-
Notifications
You must be signed in to change notification settings - Fork 48
/
chat_web.py
182 lines (145 loc) · 6.69 KB
/
chat_web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import gradio as gr
from chatbot import DocChatbot
import shutil
import os
import fnmatch
import re
block_css = """.importantButton {
background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
border: none !important;
}
.importantButton:hover {
background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
border: none !important;
}"""
webui_title = """
# Chat with Your Documents
"""
init_message = """Hello!"""
VS_ROOT_PATH = "./data/vector_store"
UPLOAD_ROOT_PATH = "./data/source_documents/"
def get_vs_list():
if not os.path.exists(VS_ROOT_PATH):
return []
file_list = os.listdir(VS_ROOT_PATH)
faiss_file_list = [file.split(".")[0] for file in file_list if fnmatch.fnmatch(file, "*.faiss")]
index_list = list(set(faiss_file_list))
# print(index_list)
return index_list
def select_vs_on_change(vs_id):
switch_kb(vs_id)
return [[None, init_message]]
def switch_kb(index: str):
docChatbot.load_vector_db_from_local(VS_ROOT_PATH, index)
docChatbot.init_chatchain()
def ingest_docs_to_vector_store(vs_name, files, vs_list, select_vs):
# print(vs_name)
# print(files)
# Check if vs_name already exists
if vs_name in vs_list:
return gr.update(visible=True), vs_list, select_vs, gr.update(value="", placeholder=f"Index name {vs_name} already exits."),
file_list = []
if files is not []:
for file in files:
filename = os.path.split(file.name)[-1]
shutil.move(file.name, UPLOAD_ROOT_PATH + filename)
file_list.append(UPLOAD_ROOT_PATH + filename)
# create new kb and ingest data to vector store
docChatbot.init_vector_db_from_documents(file_list)
docChatbot.save_vector_db_to_local(VS_ROOT_PATH, vs_name)
docChatbot.init_chatchain()
return None, vs_list + [vs_name], gr.update(choices=vs_list+[vs_name]), gr.update(value="", placeholder="")
def get_answer(message, chat_history):
#only process latest 4 messages to reduce token
MESSAGES_TO_REFERENCE = 4
msg_count = len(chat_history)
if msg_count > MESSAGES_TO_REFERENCE:
chat_history = chat_history[msg_count-MESSAGES_TO_REFERENCE:]
ch = []
for chat in chat_history:
q = "" if chat[0] == None else chat[0]
a = "" if chat[1] == None else chat[1]
# remove details for reference to reduce token
a = re.sub(r"<details>.*</details>", "", a)
ch.append((q, a))
#todo: need to handle exception
result_answer, result_source = docChatbot.get_answer_with_source(message, ch)
# print(result_answer)
# print(result_source)
output_source = "\n\n"
i = 0
for doc in result_source:
reference_html = f"""<details> <summary>Reference [{i+1}] """
# For some PDF documents, PyPDF seems not able to extract the page number. So need to check the metadata of the source.
if "source" in doc.metadata:
reference_html += f"""{os.path.basename(doc.metadata["source"])} """
if "page" in doc.metadata:
reference_html += f"""P{doc.metadata['page']+1}"""
reference_html += f"""</summary>\n"""
reference_html += f"""{doc.page_content}\n"""
reference_html += f"""</details>"""
output_source += reference_html
i += 1
chat_history.append((message, result_answer + output_source))
return "", chat_history
# Init for web ui
docChatbot = DocChatbot()
vector_stores_list = get_vs_list()
with gr.Blocks(css=block_css) as demo:
vs_list = gr.State(value=vector_stores_list)
vs_path = gr.State(value="")
gr.Markdown(webui_title)
with gr.Tab("Chat"):
with gr.Row():
with gr.Column(scale=10):
chatbot = gr.Chatbot([[None, init_message]],
elem_id="chat-box",
show_label=False).style(height=600)
query = gr.Textbox(show_label=False,
placeholder="Input your question here and press Enter to get answer.",
).style(container=False)
query.submit( # type: ignore
get_answer,
[query,chatbot],
[query,chatbot]
)
with gr.Column(scale=5):
vs_setting_switch = gr.Accordion("Switch Knowledge Base")
with vs_setting_switch:
select_vs = gr.Dropdown(vs_list.value,
interactive=True,
show_label=False,
value=vs_list.value[0] if len(vs_list.value) > 0 else None
)
if len(vs_list.value) > 0:
switch_kb(vs_list.value[0])
gr.update(value=f"Swithed to knowledge base: {vs_list.value[0]} and you may start a chat.")
select_vs.change(fn=select_vs_on_change,
inputs=[select_vs],
outputs=[chatbot])
vs_setting_upload = gr.Accordion("Upload Documents to Create Knowledge Base")
with vs_setting_upload:
# vs_add = gr.Button(value="Load")
# vs_add.click(fn=add_vs_name,
# inputs=[vs_name, vs_list, chatbot],
# outputs=[select_vs, vs_list, chatbot])
file2vs = gr.Column(visible=True)
with file2vs:
# load_vs = gr.Button("加载知识库")
# gr.Markdown("Ingest documents to create a new knowledge base")
files = gr.File(file_types=['.docx', '.pdf', '.pptx', '.txt', '.md', '.html'],
file_count="multiple"
)
vs_name = gr.Textbox(label="Please input a name for the new knowledge base",
lines=1,
interactive=True)
load_file_button = gr.Button("Upload & Create Knowledge Base")
load_file_button.click(fn=ingest_docs_to_vector_store,
show_progress=True,
inputs=[vs_name, files, vs_list, select_vs],
outputs=[files, vs_list, select_vs, vs_name],
)
demo.launch(
server_name="0.0.0.0",
server_port=8000
)