Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add streamlit app #32

Merged
merged 4 commits into from
Feb 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[theme]
backgroundColor = "#202636"
secondaryBackgroundColor = "#1a1f2b"
primaryColor = "#00ff00"
textColor = "#cdd1f4"
font = "sans serif"
165 changes: 75 additions & 90 deletions codeqai/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import argparse
import os
import subprocess

from dotenv import dotenv_values, load_dotenv
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory
from rich.console import Console
from rich.markdown import Markdown
from rich.syntax import Syntax
from yaspin import yaspin

from codeqai import codeparser, repo, utils
from codeqai.bootstrap import bootstrap
from codeqai.cache import create_cache_dir, get_cache_path, save_vector_cache
from codeqai.config import create_config, get_config_path, load_config
from codeqai.constants import EmbeddingsModel, LlmHost
from codeqai.embeddings import Embeddings
from codeqai.llm import LLM
from codeqai.vector_store import VectorStore


Expand Down Expand Up @@ -66,7 +65,7 @@ def run():
parser = argparse.ArgumentParser()
parser.add_argument(
"action",
choices=["search", "chat", "configure", "sync"],
choices=["app", "search", "chat", "configure", "sync"],
help="Action to perform. 'search' will semantically search the codebase. 'chat' will chat with the codebase.",
)
args = parser.parse_args()
Expand Down Expand Up @@ -96,25 +95,26 @@ def run():
):
required_keys.extend(
[
"OPENAI_API_TYPE",
"OPENAI_API_BASE_URL",
"OPENAI_API_BASE",
"OPENAI_API_KEY",
"OPENAI_API_VERSION",
]
)
env_path = get_config_path().replace("config.yaml", ".env")
env_loader(env_path, required_keys)

repo_name = repo.get_git_root(os.getcwd()).split("/")[-1]
repo_name = repo.repo_name()

# init cache
create_cache_dir()

embeddings_model = Embeddings(
model=EmbeddingsModel[config["embeddings"].upper().replace("-", "_")],
deployment=config["embeddings-deployment"]
if "embeddings-deployment" in config
else None,
deployment=(
config["embeddings-deployment"]
if "embeddings-deployment" in config
else None
),
)

# check if faiss.index exists
Expand All @@ -129,87 +129,72 @@ def run():
vector_store.index_documents(documents)
save_vector_cache(vector_store.vector_cache, f"{repo_name}.json")
spinner.stop()
else:
vector_store = VectorStore(repo_name, embeddings=embeddings_model.embeddings)
vector_store.load_documents()

if args.action == "sync":
spinner = yaspin(text="🔧 Parsing codebase...", color="green")
files = repo.load_files()
documents = codeparser.parse_code_files(files)
vector_store.sync_documents(documents)
save_vector_cache(vector_store.vector_cache, f"{repo_name}.json")
spinner.stop()
print("⚙️ Vector store synced with current git checkout.")

llm = LLM(
llm_host=LlmHost[config["llm-host"].upper().replace("-", "_")],
chat_model=config["chat-model"],
deployment=config["model-deployment"] if "model-deployment" in config else None,
)
memory = ConversationSummaryMemory(
llm=llm.chat_model, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(
llm.chat_model, retriever=vector_store.retriever, memory=memory
)

console = Console()
while True:
choice = None
if args.action == "sync":
break
if args.action == "search":
search_pattern = input("🔎 Enter a search pattern: ")
spinner = yaspin(text="🤖 Processing...", color="green")
spinner.start()
similarity_result = vector_store.similarity_search(search_pattern)
spinner.stop()
for doc in similarity_result:
language = utils.get_programming_language(
utils.get_file_extension(doc.metadata["filename"])
)

start_line, indentation = utils.find_starting_line_and_indent(
doc.metadata["filename"], doc.page_content
)

syntax = Syntax(
indentation + doc.page_content,
language.value,
theme="monokai",
line_numbers=True,
start_line=start_line,
indent_guides=True,
if args.action == "app":
subprocess.run(["streamlit", "run", "codeqai/streamlit.py"])
else:
vector_store, memory, qa = bootstrap(config, repo_name, embeddings_model)
console = Console()
while True:
choice = None
if args.action == "sync":
break
if args.action == "search":
search_pattern = input("🔎 Enter a search pattern: ")
spinner = yaspin(text="🤖 Processing...", color="green")
spinner.start()
similarity_result = vector_store.similarity_search(search_pattern)
spinner.stop()
for doc in similarity_result:
language = utils.get_programming_language(
utils.get_file_extension(doc.metadata["filename"])
)

start_line, indentation = utils.find_starting_line_and_indent(
doc.metadata["filename"], doc.page_content
)

syntax = Syntax(
indentation + doc.page_content,
language.value,
theme="monokai",
line_numbers=True,
start_line=start_line,
indent_guides=True,
)
print(
doc.metadata["filename"] + " -> " + doc.metadata["method_name"]
)
console.print(syntax)
print()

choice = input("[?] (C)ontinue search or (E)xit [C]:").strip().lower()

elif args.action == "chat":
question = input("💬 Ask anything about the codebase: ")
spinner = yaspin(text="🤖 Processing...", color="green")
spinner.start()
result = qa(question)
spinner.stop()
markdown = Markdown(result["answer"])
console.print(markdown)

choice = (
input("[?] (C)ontinue chat, (R)eset chat or (E)xit [C]:")
.strip()
.lower()
)
print(doc.metadata["filename"] + " -> " + doc.metadata["method_name"])
console.print(syntax)
print()

choice = input("[?] (C)ontinue search or (E)xit [C]:").strip().lower()

elif args.action == "chat":
question = input("💬 Ask anything about the codebase: ")
spinner = yaspin(text="🤖 Processing...", color="green")
spinner.start()
result = qa(question)
spinner.stop()
markdown = Markdown(result["answer"])
console.print(markdown)

choice = (
input("[?] (C)ontinue chat, (R)eset chat or (E)xit [C]:")
.strip()
.lower()
)

if choice == "r":
memory.clear()
print("Chat history cleared.")
if choice == "r":
memory.clear()
print("Chat history cleared.")
else:
print("Invalid action.")
exit()

if choice == "" or choice == "c":
continue
elif choice == "e":
break
else:
print("Invalid choice. Please enter 'C', 'E', or 'R'.")
if choice == "" or choice == "c":
continue
elif choice == "e":
break
else:
print("Invalid choice. Please enter 'C', 'E', or 'R'.")
36 changes: 36 additions & 0 deletions codeqai/bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory

from codeqai.constants import EmbeddingsModel, LlmHost
from codeqai.embeddings import Embeddings
from codeqai.llm import LLM
from codeqai.vector_store import VectorStore


def bootstrap(config, repo_name, embeddings_model=None):
if embeddings_model is None:
embeddings_model = Embeddings(
model=EmbeddingsModel[config["embeddings"].upper().replace("-", "_")],
deployment=(
config["embeddings-deployment"]
if "embeddings-deployment" in config
else None
),
)

vector_store = VectorStore(repo_name, embeddings=embeddings_model.embeddings)
vector_store.load_documents()

llm = LLM(
llm_host=LlmHost[config["llm-host"].upper().replace("-", "_")],
chat_model=config["chat-model"],
deployment=config["model-deployment"] if "model-deployment" in config else None,
)
memory = ConversationSummaryMemory(
llm=llm.chat_model, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(
llm.chat_model, retriever=vector_store.retriever, memory=memory
)

return vector_store, memory, qa
28 changes: 17 additions & 11 deletions codeqai/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import inquirer
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.llms import LlamaCpp, Ollama

from codeqai import utils
Expand All @@ -19,13 +19,19 @@ def __init__(self, llm_host: LlmHost, chat_model: str, deployment=None):
temperature=0.9, max_tokens=2048, model=chat_model
)
elif llm_host == LlmHost.AZURE_OPENAI and deployment:
self.chat_model = AzureChatOpenAI(
openai_api_base=os.getenv("OPENAI_API_BASE"),
temperature=0.9,
max_tokens=2048,
deployment_name=deployment,
model=chat_model,
)
openai_ap_base = os.getenv("OPENAI_API_BASE")
if openai_ap_base:
self.chat_model = AzureChatOpenAI(
openai_api_base=openai_ap_base,
temperature=0.9,
max_tokens=2048,
deployment_name=deployment,
model=chat_model,
)
else:
raise ValueError(
"Azure OpenAI requires environment variable OPENAI_API_BASE to be set."
)
elif llm_host == LlmHost.LLAMACPP:
self.install_llama_cpp()
self.chat_model = LlamaCpp(
Expand Down Expand Up @@ -81,9 +87,9 @@ def install_llama(backend):
elif backend == "Metal":
env_vars["CMAKE_ARGS"] = "-DLLAMA_METAL=on"
else: # Default to OpenBLAS
env_vars[
"CMAKE_ARGS"
] = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
env_vars["CMAKE_ARGS"] = (
"-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
)

try:
subprocess.run(
Expand Down
4 changes: 4 additions & 0 deletions codeqai/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from git.repo import Repo


def repo_name():
return get_git_root(os.getcwd()).split("/")[-1]


def get_git_root(path):
git_repo = Repo(path, search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")
Expand Down
Loading