generated from ubiquity/ts-template
-
Notifications
You must be signed in to change notification settings - Fork 29
/
content-evaluator-module.ts
297 lines (260 loc) · 11.8 KB
/
content-evaluator-module.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import { Value } from "@sinclair/typebox/value";
import Decimal from "decimal.js";
import { encodingForModel } from "js-tiktoken";
import OpenAI from "openai";
import { commentEnum, CommentKind, CommentType } from "../configuration/comment-types";
import { ContentEvaluatorConfiguration } from "../configuration/content-evaluator-config";
import { IssueActivity } from "../issue-activity";
import {
AllComments,
CommentToEvaluate,
openAiRelevanceResponseSchema,
PrCommentToEvaluate,
Relevances,
} from "../types/content-evaluator-module-type";
import { BaseModule } from "../types/module";
import { ContextPlugin } from "../types/plugin-input";
import { GithubCommentScore, Result } from "../types/results";
/**
* Evaluates and rates comments.
*/
export class ContentEvaluatorModule extends BaseModule {
readonly _configuration: ContentEvaluatorConfiguration | null = this.context.config.incentives.contentEvaluator;
readonly _openAi = new OpenAI({
apiKey: this.context.env.OPENAI_API_KEY,
...(this._configuration?.openAi.endpoint && { baseURL: this._configuration.openAi.endpoint }),
});
private readonly _fixedRelevances: { [k: string]: number } = {};
_getEnumValue(key: CommentType) {
let res = 0;
key.split("_").forEach((value) => {
res |= Number(commentEnum[value as keyof typeof commentEnum]);
});
return res;
}
constructor(context: ContextPlugin) {
super(context);
if (this._configuration?.multipliers) {
this._fixedRelevances = this._configuration.multipliers.reduce((acc, curr) => {
return {
...acc,
[curr.role.reduce((a, b) => this._getEnumValue(b) | a, 0)]: curr.relevance,
};
}, {});
}
}
get enabled(): boolean {
if (!this._configuration) {
this.context.logger.error("Invalid / missing configuration detected for ContentEvaluatorModule, disabling.");
return false;
}
return true;
}
async transform(data: Readonly<IssueActivity>, result: Result) {
const promises: Promise<GithubCommentScore[]>[] = [];
const allCommentsUnClean = data.allComments || [];
const allComments: { id: number; comment: string; author: string }[] = [];
for (const commentObj of allCommentsUnClean) {
if (commentObj.user) {
allComments.push({ id: commentObj.id, comment: commentObj.body ?? "", author: commentObj.user.login });
}
}
for (const key of Object.keys(result)) {
const currentElement = result[key];
const comments = currentElement.comments ?? [];
const specificationBody = data.self?.body;
if (specificationBody && comments.length) {
promises.push(
this._processComment(comments, specificationBody, allComments).then(
(commentsWithScore) => (currentElement.comments = commentsWithScore)
)
);
}
}
await Promise.all(promises);
return result;
}
_getRewardForComment(comment: GithubCommentScore, relevance: number) {
let reward = new Decimal(comment?.score?.reward ?? 0);
if (comment?.score?.formatting && comment.score.multiplier && comment.score.words) {
let totalRegexReward = new Decimal(0);
totalRegexReward = totalRegexReward.add(comment.score.words.result);
totalRegexReward = totalRegexReward.mul(comment.score.multiplier);
const totalRegexRewardWithRelevance = totalRegexReward.mul(relevance);
reward = reward.sub(totalRegexReward).add(totalRegexRewardWithRelevance);
}
return reward;
}
async _processComment(comments: Readonly<GithubCommentScore>[], specificationBody: string, allComments: AllComments) {
const commentsWithScore: GithubCommentScore[] = [...comments];
const { commentsToEvaluate, prCommentsToEvaluate } = this._splitCommentsByPrompt(commentsWithScore);
const relevancesByAi = await this._evaluateComments(
specificationBody,
commentsToEvaluate,
allComments,
prCommentsToEvaluate
);
if (Object.keys(relevancesByAi).length !== commentsToEvaluate.length + prCommentsToEvaluate.length) {
throw this.context.logger.error("Relevance / Comment length mismatch!", {
relevancesByAi,
commentsToEvaluate,
prCommentsToEvaluate,
});
}
for (const currentComment of commentsWithScore) {
let currentRelevance = 1; // For comments not in fixed relevance types and missed by OpenAI evaluation
if (this._fixedRelevances[currentComment.type]) {
currentRelevance = this._fixedRelevances[currentComment.type];
} else if (!isNaN(relevancesByAi[currentComment.id])) {
currentRelevance = relevancesByAi[currentComment.id];
}
const currentReward = this._getRewardForComment(currentComment, currentRelevance).mul(
currentComment.score?.priority ?? 1
);
currentComment.score = {
...(currentComment.score || { multiplier: 0 }),
relevance: new Decimal(currentRelevance).toNumber(),
priority: currentComment.score?.priority ?? 1,
reward: currentReward.toNumber(),
};
}
return commentsWithScore;
}
/**
* Will try to predict the maximum of tokens expected, to a maximum of totalTokenLimit.
*/
_calculateMaxTokens(prompt: string, totalTokenLimit: number = 16384) {
const tokenizer = encodingForModel("gpt-4o-2024-08-06");
const inputTokens = tokenizer.encode(prompt).length;
return Math.min(inputTokens, totalTokenLimit);
}
_generateDummyResponse(comments: { id: number; comment: string }[]) {
return comments.reduce<Record<string, number>>((acc, curr) => {
return { ...acc, [curr.id]: 0.5 };
}, {});
}
_splitCommentsByPrompt(commentsWithScore: Readonly<GithubCommentScore>[]): {
commentsToEvaluate: CommentToEvaluate[];
prCommentsToEvaluate: PrCommentToEvaluate[];
} {
const commentsToEvaluate: CommentToEvaluate[] = [];
const prCommentsToEvaluate: PrCommentToEvaluate[] = [];
for (const currentComment of commentsWithScore) {
if (!this._fixedRelevances[currentComment.type]) {
if (currentComment.type & CommentKind.PULL) {
prCommentsToEvaluate.push({
id: currentComment.id,
comment: currentComment.content,
diffHunk: currentComment?.diffHunk,
});
} else {
commentsToEvaluate.push({
id: currentComment.id,
comment: currentComment.content,
});
}
}
}
return { commentsToEvaluate, prCommentsToEvaluate };
}
async _evaluateComments(
specification: string,
comments: CommentToEvaluate[],
allComments: AllComments,
prComments: PrCommentToEvaluate[]
): Promise<Relevances> {
let commentRelevances: Relevances = {};
let prCommentRelevances: Relevances = {};
if (comments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);
const promptForComments = this._generatePromptForComments(specification, comments, allComments);
// check token count
commentRelevances = await this._submitPrompt(promptForComments, maxTokens);
}
if (prComments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(prComments), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);
const promptForPrComments = this._generatePromptForPrComments(specification, prComments);
// check token count
prCommentRelevances = await this._submitPrompt(promptForPrComments, maxTokens);
}
return { ...commentRelevances, ...prCommentRelevances };
}
async _submitPrompt(prompt: string, maxTokens: number): Promise<Relevances> {
const response: OpenAI.Chat.ChatCompletion = await this._openAi.chat.completions.create({
model: this._configuration?.openAi.model ?? "gpt-4o-2024-08-06",
response_format: { type: "json_object" },
messages: [
{
role: "system",
content: prompt,
},
],
max_tokens: maxTokens,
top_p: 1,
temperature: 1,
frequency_penalty: 0,
presence_penalty: 0,
});
const rawResponse = String(response.choices[0].message.content);
this.context.logger.info(`OpenAI raw response (using max_tokens: ${maxTokens}): ${rawResponse}`);
const jsonResponse = JSON.parse(rawResponse);
try {
const relevances = Value.Decode(openAiRelevanceResponseSchema, jsonResponse);
this.context.logger.info(`Relevances by OpenAI: ${JSON.stringify(relevances)}`);
return relevances;
} catch (e) {
throw new Error(
this.context.logger.error(`Invalid response type received from openai while evaluating: \n\nError: ${e}`, {
error: e as Error,
jsonResponse,
}).logMessage.raw
);
}
}
_generatePromptForComments(issue: string, comments: CommentToEvaluate[], allComments: AllComments) {
if (!issue?.length) {
throw new Error("Issue specification comment is missing or empty");
}
const allCommentsMap = allComments.map((value) => `${value.id} - ${value.author}: "${value.comment}"`);
const commentsMap = comments.map((value) => `${value.id}: "${value.comment}"`);
return `
Evaluate the relevance of GitHub comments to an issue. Provide a JSON object with comment IDs and their relevance scores.
Issue: ${issue}
All comments:
${allCommentsMap.join("\n")}
Comments to evaluate:
${commentsMap.join("\n")}
Instructions:
1. Read all comments carefully, considering their context and content.
2. Evaluate each comment in the "Comments to evaluate" section.
3. Assign a relevance score from 0 to 1 for each comment:
- 0: Not related (e.g., spam)
- 1: Highly relevant (e.g., solutions, bug reports)
4. Consider:
- Relation to the issue description
- Connection to other comments
- Contribution to issue resolution
5. Handle GitHub-flavored markdown:
- Ignore text beginning with '>' as it references another comment
- Distinguish between referenced text and the commenter's own words
- Only evaluate the relevance of the commenter's original content
6. Return only a JSON object: {ID: score}
Notes:
- Even minor details may be significant.
- Comments may reference earlier comments.
- The number of entries in the JSON response must equal ${commentsMap.length}.
`;
}
_generatePromptForPrComments(issue: string, comments: PrCommentToEvaluate[]) {
if (!issue?.length) {
throw new Error("Issue specification comment is missing or empty");
}
return `I need to evaluate the value of a GitHub contributor's comments in a pull request. Some of these comments are code review comments, and some are general suggestions or a part of the discussion. I'm interested in how much each comment helps to solve the GitHub issue and improve code quality. Please provide a float between 0 and 1 to represent the value of each comment. A score of 1 indicates that the comment is very valuable and significantly improves the submitted solution and code quality, whereas a score of 0 indicates a negative or zero impact. A stringified JSON is given below that contains the specification of the GitHub issue, and comments by different contributors. The property "diffHunk" presents the chunk of code being addressed for a possible change in a code review comment. \n\n\`\`\`\n${JSON.stringify(
{ specification: issue, comments: comments }
)}\n\`\`\`\n\n\nTo what degree are each of the comments valuable? Please reply with ONLY a JSON where each key is the comment ID given in JSON above, and the value is a float number between 0 and 1 corresponding to the comment. The float number should represent the value of the comment for improving the issue solution and code quality. The total number of properties in your JSON response should equal exactly ${
comments.length
}.`;
}
}