Skip to content

Commit

Permalink
Merge pull request #188 from amosproj/refactoring/156_final_code_clean
Browse files Browse the repository at this point in the history
Refactoring/156 final code clean
  • Loading branch information
jtshark authored Jul 18, 2023
2 parents 671f8f1 + e1834f8 commit 95c875c
Show file tree
Hide file tree
Showing 20 changed files with 52 additions and 36 deletions.
6 changes: 3 additions & 3 deletions QAChat/Common/bucket_managing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Nützel
# SPDX-FileCopyrightText: 2023 Jesse Palarus

from dotenv import load_dotenv
from google.cloud import storage
from weaviate.embedded import EmbeddedOptions
import weaviate
from QAChat.Common.init_db import clear_db
from get_tokens import get_tokens_path
import shutil

load_dotenv(get_tokens_path())
bucket_name = "qabot_db_data"
Expand Down
1 change: 1 addition & 0 deletions QAChat/Common/db_info.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Nützel

from weaviate.embedded import EmbeddedOptions
Expand Down
15 changes: 11 additions & 4 deletions QAChat/Common/deepL_translator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Emanuel Erben
# SPDX-FileCopyrightText: 2023 Felix Nützel
# SPDX-FileCopyrightText: 2023 Jesse Palarus

import os

Expand Down Expand Up @@ -39,10 +40,16 @@ def __init__(self):
if "language_detector" not in self.muulti_lang_nlp.pipe_names:
self.muulti_lang_nlp.add_pipe("language_detector", last=True)

def translate_to(self, text, target_lang):
doc = self.muulti_lang_nlp(text)
if doc._.language["language"] == "en" and doc._.language["score"] > 0.8:
return Result(text, "EN_US")
def translate_to(self, text, target_lang, use_spacy_to_detect_lang_if_needed=True):
if use_spacy_to_detect_lang_if_needed:
doc = self.muulti_lang_nlp(text)
if (
doc._.language["language"] == "en"
and doc._.language["score"] > 0.8
and target_lang == "EN-US"
):
return Result(text, "EN_US")

result = self.translator.translate_text(
text, target_lang=target_lang, ignore_tags="name"
)
Expand Down
1 change: 1 addition & 0 deletions QAChat/Common/init_db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Nützel
def init_db(weaviate_client):
global client
Expand Down
2 changes: 1 addition & 1 deletion QAChat/Data_Processing/data_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Tim Palarus
# SPDX-FileCopyrightText: 2023 Jesse Palarus
# SPDX-FileCopyrightText: 2023 Amela Pucic

from abc import ABC, abstractmethod
Expand Down
2 changes: 1 addition & 1 deletion QAChat/Data_Processing/document_embedder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Tim Palarus
# SPDX-FileCopyrightText: 2023 Jesse Palarus
# SPDX-FileCopyrightText: 2023 Amela Pucic
# SPDX-FileCopyrightText: 2023 Felix Nützel
# SPDX-FileCopyrightText: 2023 Emanuel Erben
Expand Down
2 changes: 1 addition & 1 deletion QAChat/Data_Processing/dummy_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Tim Palarus
# SPDX-FileCopyrightText: 2023 Jesse Palarus
# SPDX-FileCopyrightText: 2023 Amela Pucic

from datetime import datetime
Expand Down
3 changes: 0 additions & 3 deletions QAChat/Data_Processing/google_doc_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@

from __future__ import print_function
import io
from pdf_reader import PDFReader
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from QAChat.Common.init_db import init_db
from QAChat.Data_Processing.pdf_reader import PDFReader
from google.oauth2 import service_account
import os
Expand Down
2 changes: 1 addition & 1 deletion QAChat/Data_Processing/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Tim Palarus
# SPDX-FileCopyrightText: 2023 Jesse Palarus
# SPDX-FileCopyrightText: 2023 Amela Pucic

from document_embedder import DocumentEmbedder, DataSource
Expand Down
1 change: 0 additions & 1 deletion QAChat/Data_Processing/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ langchain
nltk
deepl
python-dateutil
supabase
pytesseract
pdf2image
pdfminer.six
Expand Down
10 changes: 6 additions & 4 deletions QAChat/QA_Bot/qa_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
# SPDX-FileCopyrightText: 2023 Jesse Palarus
# SPDX-FileCopyrightText: 2023 Amela Pucic

import os
from time import time
from typing import List

from huggingface_hub import hf_hub_download
from langchain import LlamaCpp, PromptTemplate
Expand Down Expand Up @@ -152,7 +150,9 @@ def __sim_search(self, question: str) -> List[str]:
]

def translate_text(self, question, language="EN-US"):
return self.translator.translate_to(question, language)
return self.translator.translate_to(
question, language, use_spacy_to_detect_lang_if_needed=False
)

def answer_question(self, question: str, handler: StreamLLMCallbackHandler | None):
"""
Expand Down Expand Up @@ -183,7 +183,9 @@ def answer_question(self, question: str, handler: StreamLLMCallbackHandler | Non
translated_question, context, handler
)
print(f"Answer: {answer}")
answer = self.translate_text(answer, translation.detected_source_lang).text
if translation.detected_source_lang != "EN-US":
answer = self.translate_text(answer, translation.detected_source_lang).text

print(f"Translated answer: {answer}")
return {
"answer": answer,
Expand Down
1 change: 0 additions & 1 deletion QAChat/QA_Bot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
flask
langchain
supabase
InstructorEmbedding
sentence-transformers
deepl
Expand Down
1 change: 0 additions & 1 deletion QAChat/QA_Bot/setup_server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Palarus

import json
import threading

from flask import Flask, request, stream_with_context, Response
Expand Down
4 changes: 3 additions & 1 deletion QAChat/QA_Bot/stream_LLM_callback_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,7 @@ def on_llm_new_token(self, token: str, **kwargs):

def send_response(self, text):
if self.lang != "EN-US":
text = self.translator.translate_to(text, self.lang).text
text = self.translator.translate_to(
text, self.lang, use_spacy_to_detect_lang_if_needed=False
).text
return json.dumps({"text": text}) + "\n"
10 changes: 5 additions & 5 deletions QAChat/Slack_Bot/qa_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
# SPDX-FileCopyrightText: 2023 Felix Nützel
# SPDX-FileCopyrightText: 2023 Jesse Palarus
import os
import queue
import re
import threading
import time

from threading import Thread

Expand Down Expand Up @@ -44,8 +41,11 @@ def receive_question(self, question, say, channel_id):
),
)

for answer in self.api_interface.request(question):
asynchronous_processor.add(answer)
try:
for answer in self.api_interface.request(question):
asynchronous_processor.add(answer)
except Exception as e:
asynchronous_processor.add(f"Ohh there is was an error...\n{e}")
asynchronous_processor.end()

def process_question(self, body, say):
Expand Down
4 changes: 4 additions & 0 deletions QAChat/Translation_Testing/translation_tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Emanuel Erben
# SPDX-FileCopyrightText: 2023 Felix Nützel

import csv
from datetime import datetime

Expand Down
11 changes: 7 additions & 4 deletions Testing/Performace/qa_bot_performance.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Jesse Palarus

import random
import time
from unittest.mock import Mock, create_autospec
Expand Down Expand Up @@ -86,7 +89,7 @@ def test_embedding_speed():
print(f"Embedding test time: {elapsed_time} seconds.")


def test_supabase_speed():
def test_database_speed():
qa_bot = QABot(
translator=mock,
model=mock,
Expand All @@ -99,7 +102,7 @@ def test_supabase_speed():
qa_bot.database.similarity_search_by_vector(random_embedding, 3)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Supabase reading time: {elapsed_time} seconds.")
print(f"LLM reading time: {elapsed_time} seconds.")


def time_this(original_method):
Expand Down Expand Up @@ -141,7 +144,7 @@ def test_overall_performance():
sum_of_chars = 0

for question in questions_en:
output = qa_bot.answer_question(question)
output = qa_bot.answer_question(question, None)
sum_of_chars += len(output["answer"])
end_time = time.time()
elapsed_time = end_time - start_time
Expand All @@ -155,5 +158,5 @@ def test_overall_performance():
test_llm_speed()
test_deepl_speed()
test_embedding_speed()
test_supabase_speed()
test_database_speed()
test_overall_performance()
5 changes: 0 additions & 5 deletions Testing/confluence_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,13 @@

from QAChat.Data_Processing.pdf_reader import PDFReader


load_dotenv("../QAChat/tokens.env")

# Get Confluence API credentials from environment variables
CONFLUENCE_ADDRESS = os.getenv("CONFLUENCE_ADDRESS")
CONFLUENCE_USERNAME = os.getenv("CONFLUENCE_USERNAME")
CONFLUENCE_TOKEN = os.getenv("CONFLUENCE_TOKEN")

# Get Supabase API credentials from environment variables
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY")


class ConfluenceIntegrationTest(unittest.TestCase):
def setUp(self):
Expand Down
4 changes: 4 additions & 0 deletions Testing/llm_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Amela Pucic
# SPDX-FileCopyrightText: 2023 Jesse Palarus

from unittest.mock import Mock

from QAChat.QA_Bot.qa_bot import QABot
Expand Down
3 changes: 3 additions & 0 deletions get_tokens.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Nützel

import os


Expand Down

0 comments on commit 95c875c

Please sign in to comment.