sourcegraph · camdencheek · Jan 17, 2025
diff --git a/agent/src/cli/command-bench/strategy-chat-context.ts b/agent/src/cli/command-bench/strategy-chat-context.ts
@@ -88,7 +88,9 @@ async function runContextCommand(
     const repoIDNamesCache = new Map<string, string>()
 
     for (const example of examples) {
+        const start = Date.now()
         const { targetRepoRevs, query: origQuery } = example
+        console.log({ query: example.query })
         const repoNames = targetRepoRevs.map(repoRev => repoRev.repoName)
 
         // Get repo IDs from cache or fetch them
@@ -108,6 +110,7 @@ async function runContextCommand(
                 uncachedRepoNames,
                 uncachedRepoNames.length + 10
             )
+            console.log('repo id names', Date.now() - start)
             if (isError(fetchedRepoIDNames)) {
                 throw new Error(
                     `getRepoIds failed for [${uncachedRepoNames.join(',')}]: ${fetchedRepoIDNames}`
@@ -135,6 +138,7 @@ async function runContextCommand(
                 completionsClient,
                 PromptString.unsafe_fromUserQuery(origQuery)
             )
+            console.log('rewrote keyword query', Date.now() - start)
         }
 
         const resultsResp = await graphqlClient.contextSearchEvalDebug({
@@ -144,6 +148,7 @@ async function runContextCommand(
             codeResultsCount: clientOpts.codeResultsCount,
             textResultsCount: clientOpts.textResultsCount,
         })
+        console.log('fetched context', Date.now() - start)
 
         if (isError(resultsResp)) {
             throw new Error(

diff --git a/vscode/src/local-context/rewrite-keyword-query.ts b/vscode/src/local-context/rewrite-keyword-query.ts
@@ -8,10 +8,6 @@ import {
 } from '@sourcegraph/cody-shared'
 import { outputChannelLogger } from '../output-channel-logger'
 
-import { francAll } from 'franc-min'
-
-const containsMultipleSentences = /[.!?][\s\r\n]+\w/
-
 /**
  * Rewrite the query, using the fast completions model to pull out keywords.
  *
@@ -22,85 +18,18 @@ export async function rewriteKeywordQuery(
     query: PromptString,
     signal?: AbortSignal
 ): Promise<string> {
-    // In evals, we saw that rewriting tends to make performance worse for simple queries. So we only rewrite
-    // in cases where it clearly helps: when it's likely in a non-English language, or there are multiple
-    // sentences (so we really need to distill the question).
-    const queryString = query.toString()
-    if (!containsMultipleSentences.test(queryString)) {
-        const english = francAll(queryString).find(v => v[0] === 'eng')
-        if (english && english[1] > 0.9) {
-            return queryString
-        }
-    }
-
     try {
-        const rewritten = await doRewrite(completionsClient, query, signal)
+        const rewritten = await extractKeywords(completionsClient, query, signal!)
+        console.log({ rewritten })
         return rewritten.length !== 0 ? rewritten.sort().join(' ') : query.toString()
     } catch (err) {
+        console.log({ err })
         outputChannelLogger.logDebug('rewrite-keyword-query', 'failed', { verbose: err })
         // If we fail to rewrite, just return the original query.
         return query.toString()
     }
 }
 
-async function doRewrite(
-    completionsClient: SourcegraphCompletionsClient,
-    query: PromptString,
-    signal?: AbortSignal
-): Promise<string[]> {
-    const preamble = getSimplePreamble(undefined, 0, 'Default')
-    const stream = completionsClient.stream(
-        {
-            messages: [
-                ...preamble,
-                {
-                    speaker: 'human',
-                    text: ps`You are helping the user search over a codebase. List some filename fragments that would match files relevant to read to answer the user's query. Present your results in a *single* XML list in the following format: <keywords><keyword><value>a single keyword</value><variants>a space separated list of synonyms and variants of the keyword, including acronyms, abbreviations, and expansions</variants><weight>a numerical weight between 0.0 and 1.0 that indicates the importance of the keyword</weight></keyword></keywords>. Here is the user query: <userQuery>${query}</userQuery>`,
-                },
-                { speaker: 'assistant' },
-            ],
-            maxTokensToSample: 400,
-            temperature: 0,
-            topK: 1,
-            fast: true,
-        },
-        { apiVersion: 0 }, // Use legacy API version for now
-        signal
-    )
-
-    const streamingText: string[] = []
-    for await (const message of stream) {
-        signal?.throwIfAborted()
-        switch (message.type) {
-            case 'change': {
-                streamingText.push(message.text)
-                break
-            }
-            case 'error': {
-                throw message.error
-            }
-        }
-    }
-
-    const text = streamingText.at(-1) ?? ''
-    const parser = new XMLParser()
-    const document = parser.parse(text)
-
-    const keywords: { value?: string; variants?: string; weight?: number }[] =
-        // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-        document?.keywords?.keyword ?? []
-    const result = new Set<string>()
-    for (const { value } of keywords) {
-        if (value) {
-            for (const v of value.split(' ')) {
-                result.add(v)
-            }
-        }
-    }
-
-    return [...result]
-}
-
 /**
  * Extracts keywords from a user query by using the completions model to identify relevant search terms.
  * The function processes the query and returns an array of individual keywords that could be found
@@ -118,7 +47,7 @@ export async function extractKeywords(
                 ...preamble,
                 {
                     speaker: 'human',
-                    text: ps`You are helping the user search over a codebase. List terms that could be found literally in code snippets or file names relevant to answering the user's query. Limit your results to terms that are in the user's query. Present your results in a *single* XML list in the following format: <keywords><keyword>a single keyword</keyword></keywords>. Here is the user query: <userQuery>${query}</userQuery>`,
+                    text: ps`You are helping the user search over a codebase. List terms that could be found literally in code snippets or file names relevant to answering the user's query. Limit your results to literal terms that are in the user's query. Present your results in a *single* XML list in the following format: <keywords><keyword>a single keyword</keyword></keywords>. Here is the user query: <userQuery>${query}</userQuery>`,
                 },
                 { speaker: 'assistant' },
             ],