elastic · dgieselaar · Feb 8, 2024 · Feb 7, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/recall.ts b/x-pack/plugins/observability_ai_assistant/server/functions/recall.ts
@@ -9,7 +9,7 @@ import { decodeOrThrow, jsonRt } from '@kbn/io-ts-utils';
 import type { Serializable } from '@kbn/utility-types';
 import dedent from 'dedent';
 import * as t from 'io-ts';
-import { last, omit } from 'lodash';
+import { compact, last, omit } from 'lodash';
 import { lastValueFrom } from 'rxjs';
 import { FunctionRegistrationParameters } from '.';
 import { MessageRole, type Message } from '../../common/types';
@@ -87,12 +87,17 @@ export function registerRecallFunction({
         messages.filter((message) => message.message.role === MessageRole.User)
-        messages.filter((message) => message.message.role === MessageRole.User)
+        messages.findLast((message) => message.message.role === MessageRole.User)
-        messages.filter((message) => message.message.role === MessageRole.User)
+        messages.findLast((message) => message.message.role === MessageRole.User)
       );
 
+      const nonEmptyQueries = queries.filter(Boolean);
+
+      const queriesOrUserPrompt = nonEmptyQueries.length
+        ? nonEmptyQueries
+        : compact([userMessage?.message.content]);
+
       const suggestions = await retrieveSuggestions({
         userMessage,
         client,
-        signal,
         categories,
-        queries,
+        queries: queriesOrUserPrompt,
       });
 
       resources.logger.debug(`Received ${suggestions.length} suggestions`);
@@ -107,9 +112,8 @@ export function registerRecallFunction({
 
       const relevantDocuments = await scoreSuggestions({
         suggestions,
-        systemMessage,
-        userMessage,
-        queries,
+        queries: queriesOrUserPrompt,
+        messages,
         client,
         connectorId,
         signal,
@@ -126,25 +130,17 @@ export function registerRecallFunction({
 }
 
 async function retrieveSuggestions({
-  userMessage,
   queries,
   client,
   categories,
-  signal,
 }: {
   userMessage?: Message;
   queries: string[];
   client: ObservabilityAIAssistantClient;
   categories: Array<'apm' | 'lens'>;
-  signal: AbortSignal;
 }) {
-  const queriesWithUserPrompt =
-    userMessage && userMessage.message.content
-      ? [userMessage.message.content, ...queries]
-      : queries;
-
   const recallResponse = await client.recall({
-    queries: queriesWithUserPrompt,
+    queries,
     categories,
   });
 
@@ -161,50 +157,42 @@ const scoreFunctionRequestRt = t.type({
 });
 
 const scoreFunctionArgumentsRt = t.type({
-  scores: t.array(
-    t.type({
-      id: t.string,
-      score: t.number,
-    })
-  ),
+  scores: t.string,
 });
 
 async function scoreSuggestions({
   suggestions,
-  systemMessage,
-  userMessage,
+  messages,
   queries,
   client,
   connectorId,
   signal,
 }: {
   suggestions: Awaited<ReturnType<typeof retrieveSuggestions>>;
-  systemMessage: Message;
-  userMessage?: Message;
+  messages: Message[];
   queries: string[];
   client: ObservabilityAIAssistantClient;
   connectorId: string;
   signal: AbortSignal;
 }) {
-  const systemMessageExtension =
-    dedent(`You have the function called score available to help you inform the user about how relevant you think a given document is to the conversation.
-    Please give a score between 1 and 7, fractions are allowed.
-    A higher score means it is more relevant.`);
-  const extendedSystemMessage = {
-    ...systemMessage,
-    message: {
-      ...systemMessage.message,
-      content: `${systemMessage.message.content}\n\n${systemMessageExtension}`,
-    },
-  };
-
-  const userMessageOrQueries =
-    userMessage && userMessage.message.content ? userMessage.message.content : queries.join(',');
+  const indexedSuggestions = suggestions.map((suggestion, index) => ({ ...suggestion, id: index }));
 
   const newUserMessageContent =
-    dedent(`Given the question "${userMessageOrQueries}", can you give me a score for how relevant the following documents are?
+    dedent(`Given the following question, score the documents that are relevant to the question. on a scale from 0 to 7,
+    0 being completely relevant, and 10 being extremely relevant. Information is relevant to the question if it helps in
-    dedent(`Given the following question, score the documents that are relevant to the question. on a scale from 0 to 7,
-    0 being completely relevant, and 10 being extremely relevant. Information is relevant to the question if it helps in
+    dedent(`Given the following question, score the documents that are relevant to the question. on a scale from 0 to 10,
+    0 being completely relevant, and 10 being extremely relevant. Information is relevant to the question if it helps in
-    dedent(`Given the following question, score the documents that are relevant to the question. on a scale from 0 to 7,
-    0 being completely relevant, and 10 being extremely relevant. Information is relevant to the question if it helps in
+    dedent(`Given the following question, score the documents that are relevant to the question. on a scale from 0 to 10,
+    0 being completely relevant, and 10 being extremely relevant. Information is relevant to the question if it helps in
+    answering the question. Judge it according to the following criteria:
+
+    - The document is relevant to the question, and the rest of the conversation
+    - The document has information relevant to the question that is not mentioned,
+      or more detailed than what is available in the conversation
+    - The document has a high amount of information relevant to the question compared to other documents
+    - The document contains new information not mentioned before in the conversation
 
-  ${JSON.stringify(suggestions, null, 2)}`);
+    Question:
+    ${queries.join('\n')}
+
+    Documents:
+    ${JSON.stringify(indexedSuggestions, null, 2)}`);
 
   const newUserMessage: Message = {
     '@timestamp': new Date().toISOString(),
@@ -223,22 +211,13 @@ async function scoreSuggestions({
       additionalProperties: false,
       properties: {
         scores: {
-          description: 'The document IDs and their scores',
-          type: 'array',
-          items: {
-            type: 'object',
-            additionalProperties: false,
-            properties: {
-              id: {
-                description: 'The ID of the document',
-                type: 'string',
-              },
-              score: {
-                description: 'The score for the document',
-                type: 'number',
-              },
-            },
-          },
+          description: `The document IDs and their scores, as CSV. Example:
+
+            my_id,7
+            my_other_id,3
+            my_third_id,4
+          `,
+          type: 'string',
         },
       },
       required: ['score'],
@@ -250,18 +229,27 @@ async function scoreSuggestions({
     (
       await client.chat('score_suggestions', {
         connectorId,
-        messages: [extendedSystemMessage, newUserMessage],
+        messages: [...messages.slice(-1), newUserMessage],
-        messages: [...messages.slice(-1), newUserMessage],
+        messages: [last(messages), newUserMessage],
-        messages: [...messages.slice(-1), newUserMessage],
+        messages: [last(messages), newUserMessage],
         functions: [scoreFunction],
         functionCall: 'score',
         signal,
       })
     ).pipe(concatenateChatCompletionChunks())
   );
   const scoreFunctionRequest = decodeOrThrow(scoreFunctionRequestRt)(response);
-  const { scores } = decodeOrThrow(jsonRt.pipe(scoreFunctionArgumentsRt))(
+  const { scores: scoresAsString } = decodeOrThrow(jsonRt.pipe(scoreFunctionArgumentsRt))(
     scoreFunctionRequest.message.function_call.arguments
   );
 
+  const scores = scoresAsString.split('\n').map((line) => {
+    const [index, score] = line
+      .split(',')
+      .map((value) => value.trim())
+      .map(Number);
+
+    return { id: suggestions[index].id, score };
+  });
+
   if (scores.length === 0) {
     return [];
   }

diff --git a/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts b/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts
@@ -14,7 +14,15 @@ import apm from 'elastic-apm-node';
 import { decode, encode } from 'gpt-tokenizer';
 import { compact, isEmpty, last, merge, noop, omit, pick, take } from 'lodash';
 import type OpenAI from 'openai';
-import { filter, isObservable, lastValueFrom, Observable, shareReplay, toArray } from 'rxjs';
+import {
+  filter,
+  firstValueFrom,
+  isObservable,
+  lastValueFrom,
+  Observable,
+  shareReplay,
+  toArray,
+} from 'rxjs';
 import { Readable } from 'stream';
 import { v4 } from 'uuid';
 import {
@@ -449,6 +457,8 @@ export class ObservabilityAIAssistantClient {
   ): Promise<Observable<ChatCompletionChunkEvent>> => {
     const span = apm.startSpan(`chat ${name}`);
 
+    const spanId = (span?.ids['span.id'] || '').substring(0, 6);
+
     const messagesForOpenAI: Array<
       Omit<OpenAI.ChatCompletionMessageParam, 'role'> & {
         role: MessageRole;
@@ -484,6 +494,8 @@ export class ObservabilityAIAssistantClient {
     this.dependencies.logger.debug(`Sending conversation to connector`);
     this.dependencies.logger.trace(JSON.stringify(request, null, 2));
 
+    const now = performance.now();
+
     const executeResult = await this.dependencies.actionsClient.execute({
       actionId: connectorId,
       params: {
@@ -495,7 +507,11 @@ export class ObservabilityAIAssistantClient {
       },
     });
 
-    this.dependencies.logger.debug(`Received action client response: ${executeResult.status}`);
+    this.dependencies.logger.debug(
+      `Received action client response: ${executeResult.status} (took: ${Math.round(
+        performance.now() - now
+      )}ms)${spanId ? ` (${spanId})` : ''}`
+    );
 
     if (executeResult.status === 'error' && executeResult?.serviceMessage) {
       const tokenLimitRegex =
@@ -518,20 +534,37 @@ export class ObservabilityAIAssistantClient {
 
     const observable = streamIntoObservable(response).pipe(processOpenAiStream(), shareReplay());
 
-    if (span) {
-      lastValueFrom(observable)
-        .then(
-          () => {
-            span.setOutcome('success');
-          },
-          () => {
-            span.setOutcome('failure');
-          }
-        )
-        .finally(() => {
-          span.end();
-        });
-    }
+    firstValueFrom(observable)
+      .then(
+        () => {},
+        () => {}
+      )
+      .finally(() => {
+        this.dependencies.logger.debug(
+          `Received first value after ${Math.round(performance.now() - now)}ms${
+            spanId ? ` (${spanId})` : ''
+          }`
+        );
+      });
+
+    lastValueFrom(observable)
+      .then(
+        () => {
+          span?.setOutcome('success');
+        },
+        () => {
+          span?.setOutcome('failure');
+        }
+      )
+      .finally(() => {
+        this.dependencies.logger.debug(
+          `Completed response in ${Math.round(performance.now() - now)}ms${
+            spanId ? ` (${spanId})` : ''
+          }`
+        );
+
+        span?.end();
+      });
 
     return observable;
   };