Skip to content

Commit

Permalink
contribute a prompt benchmarking script (#292)
Browse files Browse the repository at this point in the history
- contribute a prompt benchmarking script

This contributes a contribute a prompt benchmarking script; it gets
label information for ~100 existing issues - assumed to be correctly
labelled - and compares them against the labels predicted by the prompt.
This will allow us to benchmark prompt changes to know whether we're
improving things or not. Note that there are no prompt improvements in
this PR (just some updates for area label changes).

The benchmarking can be re-run via `dart tool/bench.dart`.

---

- [x] I’ve reviewed the contributor guide and applied the relevant
portions to this PR.

<details>
  <summary>Contribution guidelines:</summary><br>

- See our [contributor
guide](https://github.com/dart-lang/.github/blob/main/CONTRIBUTING.md)
for general expectations for PRs.
- Larger or significant changes should be discussed in an issue before
creating a PR.
- Contributions to our repos should follow the [Dart style
guide](https://dart.dev/guides/language/effective-dart) and use `dart
format`.
- Most changes should add an entry to the changelog and may need to [rev
the pubspec package
version](https://github.com/dart-lang/sdk/blob/main/docs/External-Package-Maintenance.md#making-a-change).
- Changes to packages require [corresponding
tests](https://github.com/dart-lang/.github/blob/main/CONTRIBUTING.md#Testing).

Note that many Dart repos have a weekly cadence for reviewing PRs -
please allow for some latency before initial review feedback.
</details>
  • Loading branch information
devoncarew authored Aug 28, 2024
1 parent f7191b7 commit 183fdd0
Show file tree
Hide file tree
Showing 9 changed files with 347 additions and 65 deletions.
4 changes: 2 additions & 2 deletions pkgs/sdk_triage_bot/bin/triage.dart
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void main(List<String> arguments) async {

var issue = results.rest.first;
final dryRun = results.flag('dry-run');
final force = results.flag('force');
final forceTriage = results.flag('force');

// Accept either an issue number or a url (i.e.,
// https://github.com/dart-lang/sdk/issues/55816).
Expand All @@ -69,7 +69,7 @@ void main(List<String> arguments) async {
await triage(
int.parse(issue),
dryRun: dryRun,
force: force,
forceTriage: forceTriage,
githubService: githubService,
geminiService: geminiService,
logger: Logger(),
Expand Down
12 changes: 8 additions & 4 deletions pkgs/sdk_triage_bot/lib/src/gemini.dart
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,26 @@ import 'package:google_generative_ai/google_generative_ai.dart';
import 'package:http/http.dart' as http;

class GeminiService {
// gemini-1.5-pro-latest, gemini-1.5-flash-latest, gemini-1.0-pro-latest
static const String classificationModel = 'models/gemini-1.5-flash-latest';
static const String summarizationModel = 'models/gemini-1.5-flash-latest';

final GenerativeModel _summarizeModel;
final GenerativeModel _classifyModel;

GeminiService({
required String apiKey,
required http.Client httpClient,
}) : _summarizeModel = GenerativeModel(
model: 'models/gemini-1.5-flash-latest',
model: summarizationModel,
apiKey: apiKey,
generationConfig: GenerationConfig(temperature: 0.2),
httpClient: httpClient,
),
_classifyModel = GenerativeModel(
// TODO(devconcarew): substitute our tuned model
// TODO(devoncarew): substitute our tuned model
// model: 'tunedModels/autotune-sdk-triage-tuned-prompt-1l96e2n',
model: 'models/gemini-1.5-flash-latest',
model: classificationModel,
apiKey: apiKey,
generationConfig: GenerationConfig(temperature: 0.2),
httpClient: httpClient,
Expand All @@ -45,6 +49,6 @@ class GeminiService {

Future<String> _query(GenerativeModel model, String prompt) async {
final response = await model.generateContent([Content.text(prompt)]);
return response.text!.trim();
return (response.text ?? '').trim();
}
}
79 changes: 51 additions & 28 deletions pkgs/sdk_triage_bot/lib/src/github.dart
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,15 @@ class GithubService {

Future<FetchIssuesResult> fetchIssues(
String areaLabel, {
required bool includeClosed,
String? cursor,
}) async {
final result = await _query(QueryOptions(
document: gql(_buildQueryString(areaLabel, cursor: cursor)),
document: gql(_buildQueryString(
areaLabel,
cursor: cursor,
includeClosed: includeClosed,
)),
fetchPolicy: FetchPolicy.noCache,
parserFn: (data) {
final search = data['search'] as Map<String, dynamic>;
Expand Down Expand Up @@ -104,41 +109,46 @@ Future<QueryResult<T>> _query<T>(QueryOptions<T> options) {
return _client.query<T>(options);
}

String _buildQueryString(String areaLabel, {String? cursor}) {
final cursorRef = cursor == null ? null : '"$cursor"';
String _buildQueryString(
String areaLabel, {
required bool includeClosed,
String? cursor,
}) {
final cursorTerm = cursor == null ? '' : 'after: "$cursor"';
final isOpen = includeClosed ? '' : 'is:open';

return '''{
search(
query: "repo:dart-lang/sdk is:issue is:open label:$areaLabel"
type: ISSUE
first: 100,
after: $cursorRef
) {
edges {
node {
... on Issue {
title
number
state
bodyText
labels(first: 10) {
edges {
node {
name
search(
query: "repo:dart-lang/sdk is:issue $isOpen label:$areaLabel"
type: ISSUE
first: 100
$cursorTerm
) {
edges {
node {
... on Issue {
title
number
state
bodyText
labels(first: 10) {
edges {
node {
name
}
}
}
}
}
}
pageInfo {
endCursor
startCursor
hasNextPage
hasPreviousPage
}
}
pageInfo {
endCursor
startCursor
hasNextPage
hasPreviousPage
}
}
}''';
}''';
}

final GraphQLClient _client = _initGraphQLClient();
Expand All @@ -158,4 +168,17 @@ extension IssueExtension on Issue {
///
/// Note that the original text for the issue is returned in the `body` field.
bool get hasComments => commentsCount > 0;

/// Returns whether this issue has already been triaged.
///
/// Generally, this means the the issue has had an `area-` label applied to
/// it, has had `needs-info` applied to it, or was closed.
bool get alreadyTriaged {
if (isClosed) return true;

return labels.any((label) {
final name = label.name;
return name == 'needs-info' || name.startsWith('area-');
});
}
}
3 changes: 2 additions & 1 deletion pkgs/sdk_triage_bot/lib/src/prompts.dart
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ area-infrastructure: Use area-infrastructure for SDK infrastructure issues, like
area-intellij: Tracking issues for the Dart IntelliJ plugin.
area-language: Dart language related items (some items might be better tracked at github.com/dart-lang/language).
area-meta: Cross-cutting, high-level issues (for tracking many other implementation issues, ...).
area-native-interop: Used for native interop related issues, including FFI.
area-pkg: Used for miscellaneous pkg/ packages not associated with specific area- teams.
area-sdk: Use area-sdk for general purpose SDK issues (packaging, distribution, …).
area-test: Cross-cutting test issues (use area- labels for specific failures; not used for package:test).
area-vm: Use area-vm for VM related issues, including code coverage, FFI, and the AOT and JIT backends.
area-vm: Use area-vm for VM related issues, including code coverage, and the AOT and JIT backends.
area-web: Use area-web for Dart web related issues, including the DDC and dart2js compilers and JS interop.
Don't make up a new area.
Expand Down
36 changes: 18 additions & 18 deletions pkgs/sdk_triage_bot/lib/triage.dart
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ final sdkSlug = RepositorySlug('dart-lang', 'sdk');
Future<void> triage(
int issueNumber, {
bool dryRun = false,
bool force = false,
bool forceTriage = false,
required GithubService githubService,
required GeminiService geminiService,
required Logger logger,
Expand Down Expand Up @@ -63,21 +63,22 @@ ${trimmedBody(comment.body ?? '')}
}

// decide if we should triage
final alreadyTriaged = labels.any((l) => l.startsWith('area-'));
if (alreadyTriaged && !force) {
logger.log('Exiting (issue is already triaged).');
return;
if (!forceTriage) {
if (issue.alreadyTriaged) {
logger.log('Exiting (issue is already triaged).');
return;
}
}

// ask for the summary
var bodyTrimmed = trimmedBody(issue.body);
String summary;
try {
// Failures here can include things like gemini safety issues, ...
summary = await geminiService.summarize(
summarizeIssuePrompt(title: issue.title, body: bodyTrimmed),
);
} on GenerativeAIException catch (e) {
// Failures here can include things like gemini safety issues, ...
stderr.writeln('gemini: $e');
exit(1);
}
Expand All @@ -88,21 +89,21 @@ ${trimmedBody(comment.body ?? '')}
logger.log('');

// ask for the 'area-' classification
List<String> classification;
List<String> newLabels;
try {
// Failures here can include things like gemini safety issues, ...
classification = await geminiService.classify(
newLabels = await geminiService.classify(
assignAreaPrompt(
title: issue.title, body: bodyTrimmed, lastComment: lastComment),
);
} on GenerativeAIException catch (e) {
// Failures here can include things like gemini safety issues, ...
stderr.writeln('gemini: $e');
exit(1);
}

logger.log('## gemini classification');
logger.log('');
logger.log(classification.toString());
logger.log(newLabels.toString());
logger.log('');

if (dryRun) {
Expand All @@ -113,7 +114,7 @@ ${trimmedBody(comment.body ?? '')}
// perform changes
logger.log('## github comment');
logger.log('');
logger.log('labels: $classification');
logger.log('labels: $newLabels');
logger.log('');
logger.log(summary);

Expand All @@ -122,17 +123,16 @@ ${trimmedBody(comment.body ?? '')}
// create github comment
await githubService.createComment(sdkSlug, issueNumber, comment);

final allLabels = await githubService.getAllLabels(sdkSlug);
var newLabels = filterExistingLabels(allLabels, classification);
if (newLabels.any((l) => l.startsWith('area-'))) {
newLabels.add('triage-automation');
final allRepoLabels = (await githubService.getAllLabels(sdkSlug)).toSet();
final labelAdditions = newLabels.toSet().union(allRepoLabels).toList()
..sort();
if (labelAdditions.isNotEmpty) {
labelAdditions.add('triage-automation');
}
// remove any duplicates
newLabels = newLabels.toSet().toList();

// apply github labels
if (newLabels.isNotEmpty) {
await githubService.addLabelsToIssue(sdkSlug, issueNumber, newLabels);
await githubService.addLabelsToIssue(sdkSlug, issueNumber, labelAdditions);
}

logger.log('');
Expand Down
2 changes: 1 addition & 1 deletion pkgs/sdk_triage_bot/test/triage_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void main() {

await triage(
mockIssueNumber,
force: true,
forceTriage: true,
githubService: githubService,
geminiService: geminiService,
logger: TestLogger(),
Expand Down
Loading

0 comments on commit 183fdd0

Please sign in to comment.