-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ChatKnowledge):add similarity score and query rewrite (#880)
- Loading branch information
Showing
72 changed files
with
1,452 additions
and
502 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import duckdb | ||
import pymysql | ||
|
||
""" migrate duckdb to mysql""" | ||
|
||
mysql_config = { | ||
"host": "127.0.0.1", | ||
"user": "root", | ||
"password": "your_password", | ||
"db": "dbgpt", | ||
"charset": "utf8mb4", | ||
"cursorclass": pymysql.cursors.DictCursor, | ||
} | ||
|
||
duckdb_files_to_tables = { | ||
"pilot/message/chat_history.db": "chat_history", | ||
"pilot/message/connect_config.db": "connect_config", | ||
} | ||
|
||
conn_mysql = pymysql.connect(**mysql_config) | ||
|
||
|
||
def migrate_table(duckdb_file_path, source_table, destination_table, conn_mysql): | ||
conn_duckdb = duckdb.connect(duckdb_file_path) | ||
try: | ||
cursor = conn_duckdb.cursor() | ||
cursor.execute(f"SELECT * FROM {source_table}") | ||
column_names = [ | ||
desc[0] for desc in cursor.description if desc[0].lower() != "id" | ||
] | ||
select_columns = ", ".join(column_names) | ||
|
||
cursor.execute(f"SELECT {select_columns} FROM {source_table}") | ||
results = cursor.fetchall() | ||
|
||
with conn_mysql.cursor() as cursor_mysql: | ||
for row in results: | ||
placeholders = ", ".join(["%s"] * len(row)) | ||
insert_query = f"INSERT INTO {destination_table} ({', '.join(column_names)}) VALUES ({placeholders})" | ||
cursor_mysql.execute(insert_query, row) | ||
conn_mysql.commit() | ||
finally: | ||
conn_duckdb.close() | ||
|
||
|
||
try: | ||
for duckdb_file, table in duckdb_files_to_tables.items(): | ||
print(f"Migrating table {table} from {duckdb_file}...") | ||
migrate_table(duckdb_file, table, table, conn_mysql) | ||
print(f"Table {table} migrated successfully.") | ||
finally: | ||
conn_mysql.close() | ||
|
||
print("Migration completed.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import duckdb | ||
import sqlite3 | ||
|
||
""" migrate duckdb to sqlite""" | ||
|
||
duckdb_files_to_tables = { | ||
"pilot/message/chat_history.db": "chat_history", | ||
"pilot/message/connect_config.db": "connect_config", | ||
} | ||
|
||
sqlite_db_path = "pilot/meta_data/dbgpt.db" | ||
|
||
conn_sqlite = sqlite3.connect(sqlite_db_path) | ||
|
||
|
||
def migrate_table(duckdb_file_path, source_table, destination_table, conn_sqlite): | ||
conn_duckdb = duckdb.connect(duckdb_file_path) | ||
try: | ||
cursor_duckdb = conn_duckdb.cursor() | ||
cursor_duckdb.execute(f"SELECT * FROM {source_table}") | ||
column_names = [ | ||
desc[0] for desc in cursor_duckdb.description if desc[0].lower() != "id" | ||
] | ||
select_columns = ", ".join(column_names) | ||
|
||
cursor_duckdb.execute(f"SELECT {select_columns} FROM {source_table}") | ||
results = cursor_duckdb.fetchall() | ||
|
||
cursor_sqlite = conn_sqlite.cursor() | ||
for row in results: | ||
placeholders = ", ".join(["?"] * len(row)) | ||
insert_query = f"INSERT INTO {destination_table} ({', '.join(column_names)}) VALUES ({placeholders})" | ||
cursor_sqlite.execute(insert_query, row) | ||
conn_sqlite.commit() | ||
cursor_sqlite.close() | ||
finally: | ||
conn_duckdb.close() | ||
|
||
|
||
try: | ||
for duckdb_file, table in duckdb_files_to_tables.items(): | ||
print(f"Migrating table {table} from {duckdb_file} to SQLite...") | ||
migrate_table(duckdb_file, table, table, conn_sqlite) | ||
print(f"Table {table} migrated to SQLite successfully.") | ||
finally: | ||
conn_sqlite.close() | ||
|
||
print("Migration to SQLite completed.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,111 @@ | ||
# RAG Parameter Adjustment | ||
# RAG Parameter Adjustment | ||
Each knowledge space supports argument customization, including the relevant arguments for vector retrieval and the arguments for knowledge question-answering prompts. | ||
|
||
As shown in the figure below, clicking on the "Knowledge" will trigger a pop-up dialog box. Click the "Arguments" button to enter the parameter tuning interface. | ||
![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/f02039ea-01d7-493a-acd9-027020d54267) | ||
|
||
|
||
<Tabs | ||
defaultValue="Embedding" | ||
values={[ | ||
{label: 'Embedding Argument', value: 'Embedding'}, | ||
{label: 'Prompt Argument', value: 'Prompt'}, | ||
{label: 'Summary Argument', value: 'Summary'}, | ||
]}> | ||
<TabItem value="Embedding" label="Embedding Argument"> | ||
|
||
![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/8a69aba0-3b28-449d-8fd8-ce5bf8dbf7fc) | ||
|
||
:::tip Embedding Arguments | ||
* topk:the top k vectors based on similarity score. | ||
* recall_score:set a similarity threshold score for the retrieval of similar vectors. between 0 and 1. default 0.3. | ||
* recall_type:recall type. now nly support topk by vector similarity. | ||
* model:A model used to create vector representations of text or other data. | ||
* chunk_size:The size of the data chunks used in processing.default 500. | ||
* chunk_overlap:The amount of overlap between adjacent data chunks.default 50. | ||
::: | ||
</TabItem> | ||
|
||
<TabItem value="Prompt" label="Prompt Argument"> | ||
|
||
![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/00f12903-8d70-4bfb-9f58-26f03a6a4773) | ||
|
||
:::tip Prompt Arguments | ||
* scene:A contextual parameter used to define the setting or environment in which the prompt is being used. | ||
* template:A pre-defined structure or format for the prompt, which can help ensure that the AI system generates responses that are consistent with the desired style or tone. | ||
* max_token:The maximum number of tokens or words allowed in a prompt. | ||
::: | ||
|
||
</TabItem> | ||
|
||
<TabItem value="Summary" label="Summary Argument"> | ||
|
||
![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/96782ba2-e9a2-4173-a003-49d44bf874cc) | ||
|
||
:::tip summary arguments | ||
* max_iteration: summary max iteration call with llm, default 5. the bigger and better for document summary but time will cost longer. | ||
* concurrency_limit: default summary concurrency call with llm, default 3. | ||
::: | ||
|
||
</TabItem> | ||
|
||
</Tabs> | ||
|
||
# Knowledge Query Rewrite | ||
set ``KNOWLEDGE_SEARCH_REWRITE=True`` in ``.env`` file, and restart the server. | ||
|
||
```shell | ||
# Whether to enable Chat Knowledge Search Rewrite Mode | ||
KNOWLEDGE_SEARCH_REWRITE=True | ||
``` | ||
|
||
# Change Vector Database | ||
import Tabs from '@theme/Tabs'; | ||
import TabItem from '@theme/TabItem'; | ||
|
||
<Tabs | ||
defaultValue="Chroma" | ||
values={[ | ||
{label: 'Chroma', value: 'Chroma'}, | ||
{label: 'Milvus', value: 'Milvus'}, | ||
{label: 'Weaviate', value: 'Weaviate'}, | ||
]}> | ||
<TabItem value="Chroma" label="Chroma"> | ||
|
||
set ``VECTOR_STORE_TYPE`` in ``.env`` file. | ||
|
||
```shell | ||
### Chroma vector db config | ||
VECTOR_STORE_TYPE=Chroma | ||
#CHROMA_PERSIST_PATH=/root/DB-GPT/pilot/data | ||
``` | ||
</TabItem> | ||
|
||
<TabItem value="Milvus" label="Milvus"> | ||
|
||
|
||
set ``VECTOR_STORE_TYPE`` in ``.env`` file | ||
|
||
```shell | ||
### Milvus vector db config | ||
VECTOR_STORE_TYPE=Milvus | ||
MILVUS_URL=127.0.0.1 | ||
MILVUS_PORT=19530 | ||
#MILVUS_USERNAME | ||
#MILVUS_PASSWORD | ||
#MILVUS_SECURE= | ||
``` | ||
</TabItem> | ||
|
||
<TabItem value="Weaviate" label="Weaviate"> | ||
|
||
set ``VECTOR_STORE_TYPE`` in ``.env`` file | ||
|
||
```shell | ||
### Weaviate vector db config | ||
VECTOR_STORE_TYPE=Weaviate | ||
#WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network | ||
``` | ||
|
||
</TabItem> | ||
</Tabs> |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
ChatData & ChatDB | ||
================================== | ||
ChatData generates SQL from natural language and executes it. ChatDB involves conversing with metadata from the | ||
Database, including metadata about databases, tables, and | ||
fields.![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/d8bfeee9-e982-465e-a2b8-1164b673847e) | ||
|
||
### 1.Choose Datasource | ||
|
||
If you are using DB-GPT for the first time, you need to add a data source and set the relevant connection information | ||
for the data source. | ||
|
||
```{tip} | ||
there are some example data in DB-GPT-NEW/DB-GPT/docker/examples | ||
you can execute sql script to generate data. | ||
``` | ||
|
||
#### 1.1 Datasource management | ||
|
||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52) | ||
|
||
#### 1.2 Connection management | ||
|
||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6) | ||
|
||
#### 1.3 Add Datasource | ||
|
||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085) | ||
|
||
```{note} | ||
now DB-GPT support Datasource Type | ||
* Mysql | ||
* Sqlite | ||
* DuckDB | ||
* Clickhouse | ||
* Mssql | ||
``` | ||
|
||
### 2.ChatData | ||
##### Preview Mode | ||
After successfully setting up the data source, you can start conversing with the database. You can ask it to generate | ||
SQL for you or inquire about relevant information on the database's metadata. | ||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/8acf6a42-e511-48ff-aabf-3d9037485c1c) | ||
|
||
##### Editor Mode | ||
In Editor Mode, you can edit your sql and execute it. | ||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/1a896dc1-7c0e-4354-8629-30357ffd8d7f) | ||
|
||
|
||
### 3.ChatDB | ||
|
||
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/e04bc1b1-2c58-4b33-af62-97e89098ace7) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
Installation FAQ | ||
================================== | ||
|
||
|
||
##### Q1: sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to open database file | ||
|
||
make sure you pull latest code or create directory with mkdir pilot/data | ||
|
||
##### Q2: The model keeps getting killed. | ||
|
||
your GPU VRAM size is not enough, try replace your hardware or replace other llms. | ||
|
||
##### Q3: How to access website on the public network | ||
|
||
You can try to use gradio's [network](https://github.com/gradio-app/gradio/blob/main/gradio/networking.py) to achieve. | ||
```python | ||
import secrets | ||
from gradio import networking | ||
token=secrets.token_urlsafe(32) | ||
local_port=5000 | ||
url = networking.setup_tunnel('0.0.0.0', local_port, token) | ||
print(f'Public url: {url}') | ||
time.sleep(60 * 60 * 24) | ||
``` | ||
|
||
Open `url` with your browser to see the website. | ||
|
||
##### Q4: (Windows) execute `pip install -e .` error | ||
|
||
The error log like the following: | ||
``` | ||
× python setup.py bdist_wheel did not run successfully. | ||
│ exit code: 1 | ||
╰─> [11 lines of output] | ||
running bdist_wheel | ||
running build | ||
running build_py | ||
creating build | ||
creating build\lib.win-amd64-cpython-310 | ||
creating build\lib.win-amd64-cpython-310\cchardet | ||
copying src\cchardet\version.py -> build\lib.win-amd64-cpython-310\cchardet | ||
copying src\cchardet\__init__.py -> build\lib.win-amd64-cpython-310\cchardet | ||
running build_ext | ||
building 'cchardet._cchardet' extension | ||
error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/ | ||
[end of output] | ||
``` | ||
|
||
Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) | ||
|
||
|
||
|
||
##### Q5: `Torch not compiled with CUDA enabled` | ||
|
||
``` | ||
2023-08-19 16:24:30 | ERROR | stderr | raise AssertionError("Torch not compiled with CUDA enabled") | ||
2023-08-19 16:24:30 | ERROR | stderr | AssertionError: Torch not compiled with CUDA enabled | ||
``` | ||
|
||
1. Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive) | ||
2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support. | ||
|
||
|
||
##### Q6: `How to migrate meta table chat_history and connect_config from duckdb to sqlite` | ||
```commandline | ||
python docker/examples/metadata/duckdb2sqlite.py | ||
``` | ||
|
||
##### Q7: `How to migrate meta table chat_history and connect_config from duckdb to mysql` | ||
```commandline | ||
1. update your mysql username and password in docker/examples/metadata/duckdb2mysql.py | ||
2. python docker/examples/metadata/duckdb2mysql.py | ||
``` |
Oops, something went wrong.