Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

committable comments #1369

Merged
merged 3 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 28 additions & 13 deletions pr_agent/git_providers/bitbucket_provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import difflib
import json
import re
from typing import Optional, Tuple
from urllib.parse import urlparse

Expand Down Expand Up @@ -72,24 +74,38 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
post_parameters_list = []
for suggestion in code_suggestions:
body = suggestion["body"]
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
if original_suggestion:
try:
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
diff = difflib.unified_diff(existing_code.split('\n'),
improved_code.split('\n'), n=999)
patch_orig = "\n".join(diff)
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
# replace ```suggestion ... ``` with diff_code, using regex:
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
except Exception as e:
get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
continue

relevant_file = suggestion["relevant_file"]
relevant_lines_start = suggestion["relevant_lines_start"]
relevant_lines_end = suggestion["relevant_lines_end"]

if not relevant_lines_start or relevant_lines_start == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
)
get_logger().exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
)
continue

if relevant_lines_end < relevant_lines_start:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(
f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}"
)
get_logger().exception(
f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}"
)
continue

if relevant_lines_end > relevant_lines_start:
Expand All @@ -113,16 +129,15 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
self.publish_inline_comments(post_parameters_list)
return True
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish code suggestion, error: {e}")
get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}")
return False

def publish_file_comments(self, file_comments: list) -> bool:
pass

def is_supported(self, capability: str) -> bool:
if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown',
'publish_file_comments']:
'publish_file_comments']:
return False
return True

Expand Down
36 changes: 26 additions & 10 deletions pr_agent/git_providers/bitbucket_server_provider.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import difflib
import re

from packaging.version import parse as parse_version
from typing import Optional, Tuple
from urllib.parse import quote_plus, urlparse
Expand Down Expand Up @@ -67,24 +70,37 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
post_parameters_list = []
for suggestion in code_suggestions:
body = suggestion["body"]
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
if original_suggestion:
try:
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
diff = difflib.unified_diff(existing_code.split('\n'),
improved_code.split('\n'), n=999)
patch_orig = "\n".join(diff)
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
# replace ```suggestion ... ``` with diff_code, using regex:
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
except Exception as e:
get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
continue
relevant_file = suggestion["relevant_file"]
relevant_lines_start = suggestion["relevant_lines_start"]
relevant_lines_end = suggestion["relevant_lines_end"]

if not relevant_lines_start or relevant_lines_start == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
)
get_logger().warning(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
)
continue

if relevant_lines_end < relevant_lines_start:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(
f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}"
)
get_logger().warning(
f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}"
)
continue

if relevant_lines_end > relevant_lines_start:
Expand Down
106 changes: 105 additions & 1 deletion pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import copy
import difflib
import hashlib
import itertools
import re
import time
import traceback
from datetime import datetime
Expand All @@ -11,6 +14,7 @@
from starlette_context import context

from ..algo.file_filter import filter_ignored
from ..algo.git_patch_processing import extract_hunk_headers
from ..algo.language_handler import is_valid_file
from ..algo.types import EDIT_TYPE
from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
Expand Down Expand Up @@ -415,7 +419,10 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
Publishes code suggestions as comments on the PR.
"""
post_parameters_list = []
for suggestion in code_suggestions:

code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions)

for suggestion in code_suggestions_validated:
body = suggestion['body']
relevant_file = suggestion['relevant_file']
relevant_lines_start = suggestion['relevant_lines_start']
Expand Down Expand Up @@ -872,3 +879,100 @@ def auto_approve(self) -> bool:

def calc_pr_statistics(self, pull_request_data: dict):
return {}

def validate_comments_inside_hunks(self, code_suggestions):
"""
validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub
"""
code_suggestions_copy = copy.deepcopy(code_suggestions)
diff_files = self.get_diff_files()
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

# map file extensions to programming languages
language_extension_map_org = get_settings().language_extension_map_org
extension_to_language = {}
for language, extensions in language_extension_map_org.items():
for ext in extensions:
extension_to_language[ext] = language
for file in diff_files:
extension_s = '.' + file.filename.rsplit('.')[-1]
language_name = "txt"
if extension_s and (extension_s in extension_to_language):
language_name = extension_to_language[extension_s]
file.language = language_name.lower()

for suggestion in code_suggestions_copy:
try:
relevant_file_path = suggestion['relevant_file']
for file in diff_files:
if file.filename == relevant_file_path:

# generate on-demand the patches range for the relevant file
patch_str = file.patch
if not hasattr(file, 'patches_range'):
file.patches_range = []
patch_lines = patch_str.splitlines()
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
match = RE_HUNK_HEADER.match(line)
# identify hunk header
if match:
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
file.patches_range.append({'start': start2, 'end': start2 + size2 - 1})

patches_range = file.patches_range
comment_start_line = suggestion.get('relevant_lines_start', None)
comment_end_line = suggestion.get('relevant_lines_end', None)
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
if not comment_start_line or not comment_end_line or not original_suggestion:
continue

# check if the comment is inside a valid hunk
is_valid_hunk = False
min_distance = float('inf')
patch_range_min = None
# find the hunk that contains the comment, or the closest one
for i, patch_range in enumerate(patches_range):
d1 = comment_start_line - patch_range['start']
d2 = patch_range['end'] - comment_end_line
if d1 >= 0 and d2 >= 0: # found a valid hunk
is_valid_hunk = True
min_distance = 0
patch_range_min = patch_range
break
elif d1 * d2 <= 0: # comment is possibly inside the hunk
d1_clip = abs(min(0, d1))
d2_clip = abs(min(0, d2))
d = max(d1_clip, d2_clip)
if d < min_distance:
patch_range_min = patch_range
min_distance = min(min_distance, d)
if not is_valid_hunk:
if min_distance < 10: # 10 lines - a reasonable distance to consider the comment inside the hunk
# make the suggestion non-committable, yet multi line
suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start'])
suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end'])
body = suggestion['body'].strip()

# present new diff code in collapsible
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
diff = difflib.unified_diff(existing_code.split('\n'),
improved_code.split('\n'), n=999)
patch_orig = "\n".join(diff)
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
diff_code = f"\n\n<details><summary>New proposed code:</summary>\n\n```diff\n{patch.rstrip()}\n```"
# replace ```suggestion ... ``` with diff_code, using regex:
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
body += "\n\n</details>"
suggestion['body'] = body
get_logger().info(f"Comment was moved to a valid hunk, "
f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
else:
get_logger().error(f"Comment is not inside a valid hunk, "
f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
except Exception as e:
get_logger().error(f"Failed to process patch for committable comment, error: {e}")
return code_suggestions_copy

21 changes: 13 additions & 8 deletions pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import difflib
import hashlib
import re
from typing import Optional, Tuple
Expand Down Expand Up @@ -278,20 +279,23 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
new_code_snippet = original_suggestion['improved_code']
content = original_suggestion['suggestion_content']
label = original_suggestion['label']
if 'score' in original_suggestion:
score = original_suggestion['score']
else:
score = 7
score = original_suggestion.get('score', 7)

if hasattr(self, 'main_language'):
language = self.main_language
else:
language = ''
link = self.get_line_link(relevant_file, line_start, line_end)
body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n___\n"
body_fallback +=f"\n\nReplace lines ([{line_start}-{line_end}]({link}))\n\n```{language}\n{old_code_snippet}\n````\n\n"
body_fallback +=f"with\n\n```{language}\n{new_code_snippet}\n````"
body_fallback += f"\n\n___\n\n`(Cannot implement this suggestion directly, as gitlab API does not enable committing to a non -+ line in a PR)`"
body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
body_fallback +=f"\n\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\n\n"
body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
body_fallback+="</details>\n\n"
diff_patch = difflib.unified_diff(old_code_snippet.split('\n'),
new_code_snippet.split('\n'), n=999)
patch_orig = "\n".join(diff_patch)
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
body_fallback += diff_code

# Create a general note on the file in the MR
self.mr.notes.create({
Expand All @@ -304,6 +308,7 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
'file_path': f'{target_file.filename}',
}
})
get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}")

# get_logger().debug(
# f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)")
Expand Down
Loading