-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarizer.ts
152 lines (128 loc) · 3.77 KB
/
summarizer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import OpenAI from "openai"
// Get summary of text using OpenAI
export async function extractKeyPoints(
text: string,
apiKey: string
): Promise<string> {
if (!text?.trim()) {
throw new Error("Empty or invalid input text")
}
if (!apiKey?.trim()) {
throw new Error("Invalid API key")
}
// Split text if too big
const MAX_WORDS = 12_000
const words = text.split(/\s+/)
// Text small enough, just summarize it
if (words.length <= MAX_WORDS) {
return await summarizeText(text, apiKey)
}
// Text too big, split into chunks and summarize each
const chunks = []
for (let i = 0; i < words.length; i += MAX_WORDS) {
const chunk = words
.slice(i, Math.min(i + MAX_WORDS, words.length))
.join(" ")
chunks.push(chunk)
}
// Get summary for each chunk
const summaries = []
for (const chunk of chunks) {
const summary = await summarizeText(chunk, apiKey)
summaries.push(summary)
}
// If multiple summaries, combine them
if (summaries.length > 1) {
const combinedText = summaries.join("\n\n")
return await summarizeText(combinedText, apiKey)
}
return summaries[0]
}
// Get summary from OpenAI
async function summarizeText(text: string, apiKey: string): Promise<string> {
try {
const openai = new OpenAI({ apiKey })
// Count words to decide prompt
const wordCount = text.split(/\s+/).length
const prompt = makePrompt(wordCount)
// Ask OpenAI for summary
const response = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{ role: "system", content: prompt },
{ role: "user", content: text }
],
temperature: 0.2,
max_tokens: getMaxTokens(wordCount),
presence_penalty: -0.2,
frequency_penalty: 0.3
})
const summary = response.choices[0].message.content
if (!summary) {
throw new Error("OpenAI gave empty response")
}
// Get text between <summary> tags
const match = summary.match(/<summary>(.*?)<\/summary>/s)
if (!match) {
throw new Error("OpenAI response missing summary tags")
}
return match[1].trim()
} catch (error) {
throw new Error(
"OpenAI error: " +
(error instanceof Error ? error.message : "Unknown error")
)
}
}
// Make prompt based on text length
function makePrompt(wordCount: number): string {
// Long text
if (wordCount > 500) {
return `Summarize this long text. Give key points and 3-4 sentence overview.
Format like this:
KEY POINTS:
• [point 1]
• [point 2]
...
<summary>[overview]</summary>
Rules:
- Keep important details and numbers
- Use same style and terms as original
- Only include what's in the text
- Focus on main points`
}
// Medium text
if (wordCount >= 100) {
return `Summarize this medium text. Give main points.
Format like this:
<summary>
• [point 1]
• [point 2]
</summary>
Rules:
- Keep important details and numbers
- Use same style and terms as original
- Only include what's in the text
- Focus on main points`
}
// Short text
return `Summarize this short text in one sentence.
Format like this:
<summary>[summary]</summary>
Rules:
- Keep important details and numbers
- Use same style and terms as original
- Only include what's in the text
- Focus on main point`
}
// Get max tokens based on text length
function getMaxTokens(wordCount: number): number {
// For very short text (<200 words), allow more tokens relative to input
// to ensure good quality summary. For longer text, use fewer tokens
// to keep response concise while still capturing key points
const ratio = wordCount < 200 ? 0.75 : 0.5
const calculatedTokens = Math.ceil(wordCount * ratio)
// Cap at 2000 tokens to stay within API limits
// and avoid unnecessarily long responses
return Math.min(calculatedTokens, 2000)
}