Skip to content

Commit

Permalink
refactor(search-service): enhance summary handling and localization u…
Browse files Browse the repository at this point in the history
…pdates

- Updated the `apply` function in `index.ts` to utilize a new `summaryType` variable for improved summary generation logic in the `PuppeteerBrowserTool` and `SearchTool`.
- Enhanced the `SearchTool` class to accept `summaryType` as a parameter, refining how search results are processed based on user-defined summary preferences.
- Added a new `summaryPrompt` entry in the English and Chinese localization files to support document summarization features.
- Improved text processing in `PuppeteerBrowserTool` to ensure cleaner output by replacing multiple spaces with a single space.
- These changes aim to improve the configurability and effectiveness of the search service, enhancing user experience through better summary handling and localization support.
  • Loading branch information
dingyi222666 committed Dec 13, 2024
1 parent c084ef3 commit c9d128f
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 17 deletions.
10 changes: 7 additions & 3 deletions packages/search-service/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { SearchManager } from './provide'
import { providerPlugin } from './plugin'
import { SearchTool } from './tools/search'
import { SummaryType } from './types'

export let logger: Logger

export function apply(ctx: Context, config: Config) {
Expand All @@ -39,18 +40,20 @@ export function apply(ctx: Context, config: Config) {

plugin.registerTool('web-search', {
async createTool(params, session) {
const summaryType: SummaryType =
params['summaryType'] ?? config.summaryType
const model = summaryModel ?? params.model
const browserTool = new PuppeteerBrowserTool(
ctx,
model,
params.embeddings,
{
waitUntil:
config.summaryType === SummaryType.Balanced
summaryType === SummaryType.Balanced
? 'domcontentloaded'
: 'networkidle2',
timeout:
config.summaryType === SummaryType.Balanced
summaryType === SummaryType.Balanced
? 8 * Time.second
: 30 * Time.second,
idleTimeout: 3 * Time.minute
Expand All @@ -60,7 +63,8 @@ export function apply(ctx: Context, config: Config) {
searchManager,
browserTool,
params.embeddings,
model
model,
summaryType
)
},
selector() {
Expand Down
1 change: 1 addition & 0 deletions packages/search-service/src/locales/en-US.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ $inner:
- $desc: 'Advanced Settings'
searchPrompt: 'Search prompt. Used for summarizing search results.'
newQuestionPrompt: 'New question prompt. Used for generating new questions.'
summaryPrompt: 'Summary prompt. Used for summarizing documents.'
1 change: 1 addition & 0 deletions packages/search-service/src/locales/zh-CN.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ $inner:
- $desc: 进阶设置
searchPrompt: 搜索提示词。用于对搜索结果总结的 Prompt。
newQuestionPrompt: 新问题提示词。用于生成新问题的 Prompt。
summaryPrompt: 文档总结提示词。用于总结文档的 Prompt。
1 change: 1 addition & 0 deletions packages/search-service/src/tools/puppeteerBrowserTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ export class PuppeteerBrowserTool extends StructuredTool {
.trim()
.replace(/\n{3,}/g, '\n\n')
.trim()
.replace(/\s+/g, ' ')
} catch (error) {
console.error(error)
return `Error getting page text: ${error.message}`
Expand Down
40 changes: 26 additions & 14 deletions packages/search-service/src/tools/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { SearchResult, SummaryType } from '../types'
import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model'
import { PromptTemplate } from '@langchain/core/prompts'
import { getMessageContent } from 'koishi-plugin-chatluna/utils/string'
import fs from 'fs/promises'
/* import fs from 'fs/promises' */

export class SearchTool extends Tool {
name = 'web_search'
Expand All @@ -27,15 +27,16 @@ export class SearchTool extends Tool {
private searchManager: SearchManager,
private browserTool: PuppeteerBrowserTool,
private embeddings: Embeddings,
private llm: ChatLunaChatModel
private llm: ChatLunaChatModel,
private summaryType: SummaryType
) {
super({})
}

async _call(arg: string): Promise<string> {
const documents = await this.fetchSearchResult(arg)

if (this.searchManager.config.summaryType === SummaryType.Speed) {
if (this.summaryType !== SummaryType.Balanced) {
return JSON.stringify(
documents.map((document) =>
Object.assign({}, document.metadata as SearchResult, {
Expand All @@ -46,6 +47,7 @@ export class SearchTool extends Tool {
}

const fakeSearchResult = await generateFakeSearchResult(arg, this.llm)

return JSON.stringify(
await this._reRankDocuments(
getMessageContent(fakeSearchResult.content),
Expand All @@ -57,7 +59,7 @@ export class SearchTool extends Tool {
private async fetchSearchResult(query: string) {
const results = await this.searchManager.search(query)

if (this.searchManager.config.summaryType === SummaryType.Quality) {
if (this.summaryType === SummaryType.Quality) {
return await Promise.all(
results.map(async (result, k) => {
let pageContent = result.description
Expand All @@ -66,12 +68,16 @@ export class SearchTool extends Tool {
const browserContent: string =
await this.browserTool.invoke({
url: result.url,
action: 'summary',
action: 'summarize',
params: query
})

console.log(browserContent)
if (
!browserContent.includes('Error getting page text:')
!browserContent.includes(
'Error getting page text:'
) &&
browserContent !== '[none]'
) {
pageContent = browserContent
}
Expand All @@ -96,9 +102,7 @@ export class SearchTool extends Tool {
return chunks
})
).then((documents) => documents.flat())
} else if (
this.searchManager.config.summaryType === SummaryType.Balanced
) {
} else if (this.summaryType === SummaryType.Balanced) {
return await Promise.all(
results.map(async (result, k) => {
let pageContent = result.description
Expand Down Expand Up @@ -154,12 +158,12 @@ export class SearchTool extends Tool {

const searchResult = await vectorStore.similaritySearchWithScore(
query,
this.searchManager.config.topK * 3
this.searchManager.config.topK * 2
)

for (const [index, result] of searchResult.entries()) {
/* for (const [index, result] of searchResult.entries()) {
await fs.writeFile(`tmp/tmp-${index}.txt`, result[0].pageContent)
}
} */

return searchResult
.filter(
Expand All @@ -184,7 +188,15 @@ export async function generateFakeSearchResult(
}

const GENERATE_FAKE_SEARCH_RESULT_PROMPT = new PromptTemplate({
template:
'Generate a fake search result for the query: "{query}". The response should be relevant to web content, concise, and between 50 to 100 characters long.',
template: `Based on the question: "{query}"
Generate a brief, factual answer that:
- Directly addresses the core question
- Uses clear and concise language
- Stays between 50-100 characters
- Contains key factual information
- Avoids speculation or uncertainty
Answer the question as if you are a search result snippet.`,
inputVariables: ['query']
})

0 comments on commit c9d128f

Please sign in to comment.