diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b464b1e92..9655c95db 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -183,6 +183,7 @@ enable_help_text=true final_update_message = false [pr_help] # /help # +force_local_db=false [pr_config] # /config # diff --git a/pr_agent/tools/pr_help_message.py b/pr_agent/tools/pr_help_message.py index 720b470dc..5b86e6d64 100644 --- a/pr_agent/tools/pr_help_message.py +++ b/pr_agent/tools/pr_help_message.py @@ -1,106 +1,350 @@ +import os +import traceback +import zipfile +import tempfile +import copy +from functools import partial + +from jinja2 import Environment, StrictUndefined + +from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler +from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler +from pr_agent.algo.pr_processing import retry_with_fallback_models +from pr_agent.algo.token_handler import TokenHandler +from pr_agent.algo.utils import ModelType, load_yaml from pr_agent.config_loader import get_settings -from pr_agent.git_providers import get_git_provider, GithubProvider +from pr_agent.git_providers import get_git_provider, GithubProvider, BitbucketServerProvider, \ + get_git_provider_with_context from pr_agent.log import get_logger +def extract_header(snippet): + res = '' + lines = snippet.split('===Snippet content===')[0].split('\n') + highest_header = '' + highest_level = float('inf') + for line in lines[::-1]: + line = line.strip() + if line.startswith('Header '): + highest_header = line.split(': ')[1] + if highest_header: + res = f"#{highest_header.lower().replace(' ', '-')}" + return res + class PRHelpMessage: - def __init__(self, pr_url: str, args=None, ai_handler=None): - self.git_provider = get_git_provider()(pr_url) + def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): + self.git_provider = get_git_provider_with_context(pr_url) + self.ai_handler = ai_handler() + self.question_str = self.parse_args(args) + if self.question_str: + self.vars = { + "question": self.question_str, + "snippets": "", + } + self.token_handler = TokenHandler(None, + self.vars, + get_settings().pr_help_prompts.system, + get_settings().pr_help_prompts.user) + + async def _prepare_prediction(self, model: str): + try: + variables = copy.deepcopy(self.vars) + environment = Environment(undefined=StrictUndefined) + system_prompt = environment.from_string(get_settings().pr_help_prompts.system).render(variables) + user_prompt = environment.from_string(get_settings().pr_help_prompts.user).render(variables) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) + return response + except Exception as e: + get_logger().error(f"Error while preparing prediction: {e}") + return "" + + def parse_args(self, args): + if args and len(args) > 0: + question_str = " ".join(args) + else: + question_str = "" + return question_str + + def get_sim_results_from_s3_db(self, embeddings): + get_logger().info("Loading the S3 index...") + sim_results = [] + try: + from langchain_chroma import Chroma + import boto3 + with tempfile.TemporaryDirectory() as temp_dir: + # Define the local file path within the temporary directory + local_file_path = os.path.join(temp_dir, 'chroma_db.zip') + + # Initialize the S3 client + s3 = boto3.client('s3') + + # Download the file from S3 to the temporary directory + bucket = 'pr-agent' + file_name = 'chroma_db.zip' + s3.download_file(bucket, file_name, local_file_path) + + # Extract the contents of the zip file + with zipfile.ZipFile(local_file_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + vectorstore = Chroma(persist_directory=temp_dir + "/chroma_db", + embedding_function=embeddings) + sim_results = vectorstore.similarity_search_with_score(self.question_str, k=4) + except Exception as e: + get_logger().error(f"Error while getting sim from S3: {e}", + artifact={"traceback": traceback.format_exc()}) + return sim_results + + def get_sim_results_from_local_db(self, embeddings): + get_logger().info("Loading the local index...") + sim_results = [] + try: + from langchain_chroma import Chroma + get_logger().info("Loading the Chroma index...") + with tempfile.TemporaryDirectory() as temp_dir: + db_path = "./docs/chroma_db.zip" + + # Extract the ZIP file + with zipfile.ZipFile(db_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + vectorstore = Chroma(persist_directory=temp_dir + "/chroma_db", + embedding_function=embeddings) + + # Do similarity search + sim_results = vectorstore.similarity_search_with_score(self.question_str, k=4) + except Exception as e: + get_logger().error(f"Error while getting sim from local db: {e}", + artifact={"traceback": traceback.format_exc()}) + return sim_results + + def get_sim_results_from_pinecone_db(self, embeddings): + get_logger().info("Loading the Pinecone index...") + sim_results = [] + try: + from langchain_pinecone import PineconeVectorStore + INDEX_NAME = "pr-agent-docs" + vectorstore = PineconeVectorStore( + index_name=INDEX_NAME, embedding=embeddings, + pinecone_api_key=get_settings().pinecone.api_key + ) + + # Do similarity search + sim_results = vectorstore.similarity_search_with_score(self.question_str, k=4) + except Exception as e: + get_logger().error(f"Error while getting sim from Pinecone db: {e}", + artifact={"traceback": traceback.format_exc()}) + return sim_results async def run(self): try: - if not self.git_provider.is_supported("gfm_markdown"): - self.git_provider.publish_comment( - "The `Help` tool requires gfm markdown, which is not supported by your code platform.") - return - - get_logger().info('Getting PR Help Message...') - relevant_configs = {'pr_help': dict(get_settings().pr_help), - 'config': dict(get_settings().config)} - get_logger().debug("Relevant configs", artifacts=relevant_configs) - pr_comment = "## PR Agent Walkthrough 🤖\n\n" - pr_comment += "Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more.""" - pr_comment += "\n\nHere is a list of tools you can use to interact with the PR Agent:\n" - base_path = "https://pr-agent-docs.codium.ai/tools" - - tool_names = [] - tool_names.append(f"[DESCRIBE]({base_path}/describe/)") - tool_names.append(f"[REVIEW]({base_path}/review/)") - tool_names.append(f"[IMPROVE]({base_path}/improve/)") - tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)") - tool_names.append(f"[ADD DOCS]({base_path}/documentation/) 💎") - tool_names.append(f"[TEST]({base_path}/test/) 💎") - tool_names.append(f"[IMPROVE COMPONENT]({base_path}/improve_component/) 💎") - tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎") - tool_names.append(f"[ASK]({base_path}/ask/)") - tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎") - tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎") - tool_names.append(f"[CUSTOM PROMPT]({base_path}/custom_prompt/) 💎") - tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)") - - descriptions = [] - descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels") - descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more") - descriptions.append("Code suggestions for improving the PR") - descriptions.append("Automatically updates the changelog") - descriptions.append("Generates documentation to methods/functions/classes that changed in the PR") - descriptions.append("Generates unit tests for a specific component, based on the PR code change") - descriptions.append("Code suggestions for a specific component that changed in the PR") - descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component") - descriptions.append("Answering free-text questions about the PR") - descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user") - descriptions.append("Generates feedback and analysis for a failed CI job") - descriptions.append("Generates custom suggestions for improving the PR code, derived only from a specific guidelines prompt defined by the user") - descriptions.append("Automatically retrieves and presents similar issues") - - commands =[] - commands.append("`/describe`") - commands.append("`/review`") - commands.append("`/improve`") - commands.append("`/update_changelog`") - commands.append("`/add_docs`") - commands.append("`/test`") - commands.append("`/improve_component`") - commands.append("`/analyze`") - commands.append("`/ask`") - commands.append("`/generate_labels`") - commands.append("`/checks`") - commands.append("`/custom_prompt`") - commands.append("`/similar_issue`") - - checkbox_list = [] - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append(" - [ ] Run ") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - checkbox_list.append("[*]") - - if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False): - pr_comment += f"" - for i in range(len(tool_names)): - pr_comment += f"\n\n\n" - pr_comment += "
ToolDescriptionTrigger Interactively :gem:
\n\n{tool_names[i]}{descriptions[i]}\n\n{checkbox_list[i]}\n
\n\n" - pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" - pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask ""`. See the relevant documentation for each tool for more details.""" + if self.question_str: + get_logger().info(f'Answering a PR question about the PR {self.git_provider.pr_url} ') + + if not get_settings().openai.key: + if get_settings().config.publish_output: + self.git_provider.publish_comment( + "The `Help` tool chat requires an OpenAI API key, which is not configured.") + else: + get_logger().error("The `Help` tool chat requires an OpenAI API key, which is not configured.") + return + + # Initialize embeddings + from langchain_openai import OpenAIEmbeddings + embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", + api_key=get_settings().openai.key) + + # Get similar snippets via similarity search + if get_settings().pr_help.force_local_db: + sim_results = self.get_sim_results_from_local_db(embeddings) + elif get_settings().pinecone.api_key: + sim_results = self.get_sim_results_from_pinecone_db(embeddings) + else: + sim_results = self.get_sim_results_from_s3_db(embeddings) + if not sim_results: + get_logger().info("Failed to load the S3 index. Loading the local index...") + sim_results = self.get_sim_results_from_local_db(embeddings) + + # Prepare relevant snippets + relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str =\ + await self.prepare_relevant_snippets(sim_results) + self.vars['snippets'] = relevant_snippets_str.strip() + + # run the AI model + response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR) + response_yaml = load_yaml(response) + response_str = response_yaml.get('response') + relevant_snippets_numbers = response_yaml.get('relevant_snippets') + + # prepare the answer + answer_str = "" + if response_str: + answer_str += f"### Question: \n{self.question_str}\n\n" + answer_str += f"### Answer:\n{response_str.strip()}\n\n" + answer_str += f"#### Relevant Sources:\n\n" + paged_published = [] + for page in relevant_snippets_numbers: + page = int(page - 1) + if page < len(relevant_pages_full) and page >= 0: + if relevant_pages_full[page] in paged_published: + continue + link = f"{relevant_pages_full[page]}{relevant_snippets_full_header[page]}" + # answer_str += f"> - [{relevant_pages_full[page]}]({link})\n" + answer_str += f"> - {link}\n" + paged_published.append(relevant_pages_full[page]) + + # publish the answer + if get_settings().config.publish_output: + self.git_provider.publish_comment(answer_str) + else: + get_logger().info(f"Answer: {response}") else: - pr_comment += f"" - for i in range(len(tool_names)): - pr_comment += f"\n" - pr_comment += "
ToolCommandDescription
\n\n{tool_names[i]}{commands[i]}{descriptions[i]}
\n\n" - pr_comment += f"""\n\nNote that each tool be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" - if get_settings().config.publish_output: - self.git_provider.publish_comment(pr_comment) + if not isinstance(self.git_provider, BitbucketServerProvider) and not self.git_provider.is_supported("gfm_markdown"): + self.git_provider.publish_comment( + "The `Help` tool requires gfm markdown, which is not supported by your code platform.") + return + + get_logger().info('Getting PR Help Message...') + relevant_configs = {'pr_help': dict(get_settings().pr_help), + 'config': dict(get_settings().config)} + get_logger().debug("Relevant configs", artifacts=relevant_configs) + pr_comment = "## PR Agent Walkthrough 🤖\n\n" + pr_comment += "Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more.""" + pr_comment += "\n\nHere is a list of tools you can use to interact with the PR Agent:\n" + base_path = "https://pr-agent-docs.codium.ai/tools" + + tool_names = [] + tool_names.append(f"[DESCRIBE]({base_path}/describe/)") + tool_names.append(f"[REVIEW]({base_path}/review/)") + tool_names.append(f"[IMPROVE]({base_path}/improve/)") + tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)") + tool_names.append(f"[ADD DOCS]({base_path}/documentation/) 💎") + tool_names.append(f"[TEST]({base_path}/test/) 💎") + tool_names.append(f"[IMPROVE COMPONENT]({base_path}/improve_component/) 💎") + tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎") + tool_names.append(f"[ASK]({base_path}/ask/)") + tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎") + tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎") + tool_names.append(f"[CUSTOM PROMPT]({base_path}/custom_prompt/) 💎") + tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)") + + descriptions = [] + descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels") + descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more") + descriptions.append("Code suggestions for improving the PR") + descriptions.append("Automatically updates the changelog") + descriptions.append("Generates documentation to methods/functions/classes that changed in the PR") + descriptions.append("Generates unit tests for a specific component, based on the PR code change") + descriptions.append("Code suggestions for a specific component that changed in the PR") + descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component") + descriptions.append("Answering free-text questions about the PR") + descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user") + descriptions.append("Generates feedback and analysis for a failed CI job") + descriptions.append("Generates custom suggestions for improving the PR code, derived only from a specific guidelines prompt defined by the user") + descriptions.append("Automatically retrieves and presents similar issues") + + commands =[] + commands.append("`/describe`") + commands.append("`/review`") + commands.append("`/improve`") + commands.append("`/update_changelog`") + commands.append("`/add_docs`") + commands.append("`/test`") + commands.append("`/improve_component`") + commands.append("`/analyze`") + commands.append("`/ask`") + commands.append("`/generate_labels`") + commands.append("`/checks`") + commands.append("`/custom_prompt`") + commands.append("`/similar_issue`") + + checkbox_list = [] + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append(" - [ ] Run ") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + checkbox_list.append("[*]") + + if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False): + pr_comment += f"" + for i in range(len(tool_names)): + pr_comment += f"\n\n\n" + pr_comment += "
ToolDescriptionTrigger Interactively :gem:
\n\n{tool_names[i]}{descriptions[i]}\n\n{checkbox_list[i]}\n
\n\n" + pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" + pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask ""`. See the relevant documentation for each tool for more details.""" + elif isinstance(self.git_provider, BitbucketServerProvider): + # only support basic commands in BBDC + pr_comment = generate_bbdc_table(tool_names[:4], descriptions[:4]) + else: + pr_comment += f"" + for i in range(len(tool_names)): + pr_comment += f"\n" + pr_comment += "
ToolCommandDescription
\n\n{tool_names[i]}{commands[i]}{descriptions[i]}
\n\n" + pr_comment += f"""\n\nNote that each tool be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).""" + + if get_settings().config.publish_output: + self.git_provider.publish_comment(pr_comment) except Exception as e: - get_logger().error(f"Error while running PRHelpMessage: {e}") - return "" \ No newline at end of file + get_logger().exception(f"Error while running PRHelpMessage: {e}") + return "" + + async def prepare_relevant_snippets(self, sim_results): + # Get relevant snippets + relevant_pages = [] + relevant_snippets = [] + relevant_snippets_full = [] + relevant_pages_full = [] + relevant_snippets_full_header = [] + th = 0.75 + for s in sim_results: + page = s[0].metadata['source'] + content = s[0].page_content + score = s[1] + relevant_snippets_full.append(content) + relevant_snippets_full_header.append(extract_header(content)) + relevant_pages_full.append(page) + if not relevant_pages: + relevant_pages.append(page) + relevant_snippets.append(content) + elif score > th: + if page not in relevant_pages: + relevant_pages.append(page) + relevant_snippets.append(content) + # build the snippets string + relevant_snippets_str = "" + for i, s in enumerate(relevant_snippets_full): + relevant_snippets_str += f"Snippet {i}:\n\n{s}\n\n" + relevant_snippets_str += "-------------------\n\n" + return relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str + + +def generate_bbdc_table(column_arr_1, column_arr_2): + # Generating header row + header_row = "| Tool | Description | \n" + + # Generating separator row + separator_row = "|--|--|\n" + + # Generating data rows + data_rows = "" + max_len = max(len(column_arr_1), len(column_arr_2)) + for i in range(max_len): + col1 = column_arr_1[i] if i < len(column_arr_1) else "" + col2 = column_arr_2[i] if i < len(column_arr_2) else "" + data_rows += f"| {col1} | {col2} |\n" + + # Combine all parts to form the complete table + markdown_table = header_row + separator_row + data_rows + return markdown_table diff --git a/requirements.txt b/requirements.txt index 854e1d67a..05f13ad76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.4 +aiohttp==3.9.5 anthropic[vertex]==0.21.3 atlassian-python-api==3.41.4 azure-devops==7.1.0b3 @@ -13,7 +13,7 @@ Jinja2==3.1.2 litellm==1.43.13 loguru==0.7.2 msrest==0.7.1 -openai==1.40.6 +openai==1.46.0 pytest==7.4.0 PyGithub==1.59.* PyYAML==6.0.1 @@ -28,6 +28,12 @@ gunicorn==22.0.0 pytest-cov==5.0.0 pydantic==2.8.2 html2text==2024.2.26 +# help bot +langchain==0.3.0 +langchain-openai==0.2.0 +langchain-pinecone==0.2.0 +langchain-chroma==0.1.4 +chromadb==0.5.7 # Uncomment the following lines to enable the 'similar issue' tool # pinecone-client # pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main