Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional repo metadata to llm prompts. #69

Merged
merged 1 commit into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ notebooks/
.benchmarks/

# Other
templates/
docs/docs
docs/notes
docs/flow.md
examples/markdown/readme-edgecase.md
readmeai/settings/prompts.toml
readmeai/markdown/data/badges.json
templates/
2 changes: 1 addition & 1 deletion docs/features.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## Key Features
## Features

<br>
<div>
Expand Down
3 changes: 3 additions & 0 deletions docs/overview.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# README-AI

---

## Why README-AI?

---
439 changes: 257 additions & 182 deletions examples/markdown/readme-python.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "readmeai"
version = "0.4.036"
version = "0.4.037"
description = "Generate beautiful README.md files from the terminal, powered by AI."
authors = ["Eli <0x.eli.64s@gmail.com>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion readmeai/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ class PathsConfig(BaseModel):
class PromptsConfig(BaseModel):
"""Pydantic model for OpenAI prompts."""

code_summary: str
features: str
overview: str
slogan: str
summaries: str


class AppConfig(BaseModel):
Expand Down
17 changes: 12 additions & 5 deletions readmeai/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,20 @@ def __init__(self, config: settings.AppConfig):
self.rate_limit_semaphore = asyncio.Semaphore(self.rate_limit)

async def code_to_text(
self, ignore: dict, files: Dict[str, str], prompt: str
self,
files: Dict[str, str],
ignore: Dict[str, List[str]],
prompt: str,
tree: str,
) -> Dict[str, str]:
"""Converts code to natural language text using large language models.
Parameters
----------
ignore : dict
Files, directories, or file extensions to ignore.
files : Dict[str, str]
The repository files to convert to text.
ignore : Dict[str, List[str]]
Files, directories, or file extensions to ignore.
prompt : str
The prompt to use for the OpenAI API calls.
Expand All @@ -88,7 +92,7 @@ async def code_to_text(
self.logger.warning(f"Ignoring file: {path}")
continue

prompt_code = prompt.format(str(path), contents)
prompt_code = prompt.format(tree, str(path), contents)
tasks.append(
asyncio.create_task(
self.generate_text(path, prompt_code, self.tokens)
Expand Down Expand Up @@ -160,7 +164,10 @@ async def generate_text(
try:
token_count = get_token_count(prompt, self.encoding)

if token_count > tokens:
if token_count > self.tokens_max:
self.logger.warning(
f"Truncating tokens: {token_count} > {self.tokens_max}"
)
prompt = truncate_tokens(prompt, tokens)

async with self.rate_limit_semaphore:
Expand Down
2 changes: 1 addition & 1 deletion readmeai/core/tokens.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for handling tokennization."""
"""Utilities for handling language tokens."""

from tiktoken import encoding_for_model, get_encoding

Expand Down
19 changes: 10 additions & 9 deletions readmeai/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import asyncio
import traceback

import requests

from readmeai.config.settings import (
AppConfig,
AppConfigModel,
Expand All @@ -20,6 +18,7 @@
from readmeai.core import logger, model, preprocess
from readmeai.markdown import headers, tables, tree
from readmeai.services import version_control as vcs
from readmeai.utils import utils

logger = logger.Logger(__name__)

Expand Down Expand Up @@ -80,23 +79,25 @@ async def readme_agent(conf: AppConfig, conf_helper: ConfigHelper) -> None:
parser = preprocess.RepositoryParser(conf, conf_helper)
dependencies, files = parser.get_dependencies(temp_dir)
logger.info(f"Dependencies: {dependencies}")
logger.info(f"Files: {files}")

# Generate codebase file summaries and README.md text via LLMs.
if conf.cli.offline is False:
code_summary = await llm.code_to_text(
conf_helper.ignore_files,
files,
conf.prompts.code_summary,
conf_helper.ignore_files,
conf.prompts.summaries,
tree_str,
)
logger.info(f"Code summaries returned:\n{code_summary[:5]}")
prompts = [
conf.prompts.slogan.format(conf.git.name),
conf.prompts.overview.format(repository, code_summary),
conf.prompts.features.format(repository, tree),
conf.prompts.overview.format(
repository, tree_str, dependencies, code_summary
),
conf.prompts.features.format(
repository, tree_str, dependencies, code_summary
),
]
slogan, overview, features = await llm.chat_to_text(prompts)

else:
conf.md.tables = tables.build_recursive_tables(
repository, temp_dir, placeholder
Expand Down
35 changes: 19 additions & 16 deletions readmeai/settings/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ endpoint = "https://api.openai.com/v1/chat/completions"
encoding = "cl100k_base"
model = "gpt-3.5-turbo"
rate_limit = 3
tokens = 650
tokens_max = 3800
temperature = 0.9
tokens = 750
tokens_max = 4000
temperature = 1.0

# Version Control Systems
[base_urls]
Expand Down Expand Up @@ -38,12 +38,7 @@ output = "readme-ai.md"

# Prompts
[prompts]
code_summary = """Offer a comprehensive summary that encapsulates the core functionalities of the code:
\nPath: {0}\nContents:\n{1}\n Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity.
Limit your response to a maximum of 225 characters (including spaces).
"""
features = """Hello! Analyze the Git codebase {} and create a robust summary of the project's features.
The following information summarizes each file in the repository to help you get started: \n{}\n
features = """Hello! Analyze the repository {0} and following the instructions below to generate a comprehensive list of features.
Please provide a comprehensive technical analysis of the codebase and its components.
Consider the codebase as a whole and highlight the key characteristics, design patterns, architectural decisions, and any other noteworthy elements.
Generate your response as a Markdown table with the following columns:
Expand All @@ -55,21 +50,29 @@ Generate your response as a Markdown table with the following columns:
| 🔗 | **Dependencies** | Examine the external libraries or other systems that this system relies on here. Limit your response to a maximum of 200 characters.|
| 🧩 | **Modularity** | Discuss the system's organization into smaller, interchangeable components here. Limit your response to a maximum of 200 characters.|
| 🧪 | **Testing** | Evaluate the system's testing strategies and tools here. Limit your response to a maximum of 200 characters. |
| ⚡️ | **Performance** | Analyze how well the system performs, considering speed, efficiency, and resource usage here. Limit your response to a maximum of 200 characters.|
| ⚡️ | **Performance** | Analyze how well the system performs, considering speed, efficiency, and resource usage here. Limit your response to a maximum of 200 characters.|
| 🔐 | **Security** | Assess the measures the system uses to protect data and maintain functionality here. Limit your response to a maximum of 200 characters.|
| 🔀 | **Version Control**| Discuss the system's version control strategies and tools here. Limit your response to a maximum of 200 characters.|
| 🔌 | **Integrations** | Evaluate how the system interacts with other systems and services here. Limit your response to a maximum of 200 characters.|
| 📶 | **Scalability** | Analyze the system's ability to handle growth here. Limit your response to a maximum of 200 characters. |

Thank you for your time and effort!
Repository Details:
\nDirectory Tree: {1}\nDependencies: {2}\nCode Summaries: {3}\n
"""
overview = """Generate a <=100 word summary that describes the capabilities of the repository {0}.
Focus on the project's use-case and value proposition, not its technical details.
Do not refer to the project using the URL provided. Below are more details of the
project for you can get a deep nderstanding of the codebase and its components.
Repository Details:
\nDirectory Tree: {1}\nDependencies: {2}\nCode Summaries: {3}\n
"""
overview = """Please analyze the codebase located at {} and provide a robust, yet succinct overview of the rpoject.
The following includes a list of the summaries of the files in the repository: \n{}\n
Craft 3-4 sentences that encapsulate the core functionalities of the project, its purpose, and its value proposition.
slogan = "Conceptualize a catchy and memorable slogan for the GitHub project: {0}. Limit your response to 80 characters."
summaries = """Offer a comprehensive summary <= 80 words that encapsulates the core functionalities of the code below.
Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity.
\nDirectory Tree: {0}\nPath: {1}\nCode:\n{2}\n
"""
slogan = "Conceptualize a catchy and memorable slogan for the GitHub project: {}. Limit your response to 80 characters."

# Markdown Template Code
# Markdown Templates
[md]
tables = ""
default = "► INSERT-TEXT"
Expand Down
1 change: 1 addition & 0 deletions readmeai/settings/ignore_files.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,5 @@ files = [
"__init__.py",
"start",
"test_binary",
"mkdocs.yml",
]
20 changes: 12 additions & 8 deletions readmeai/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@

def should_ignore(conf_helper: ConfigHelper, file_path: Path) -> bool:
"""Filters out files that should be ignored."""
for directory in conf_helper.ignore_files["directories"]:
if directory in file_path.parts:
logger.debug(f"Ignoring directory: {file_path}")
return True
ignore_files = conf_helper.ignore_files

if file_path.name in conf_helper.ignore_files["files"]:
logger.debug(f"Ignoring file: {file_path}")
if any(
directory in file_path.parts
for directory in ignore_files["directories"]
):
logger.debug(f"Ignoring directory: {file_path.name}")
return True

if file_path.suffix[1:] in conf_helper.ignore_files["extensions"]:
logger.debug(f"Ignoring extension: {file_path}")
if file_path.name in ignore_files["files"]:
logger.debug(f"Ignoring file: {file_path.name}")
return True

if file_path.suffix.lstrip(".") in ignore_files["extensions"]:
logger.debug(f"Ignoring extension: {file_path.name}")
return True

return False
Expand Down
Loading