Skip to content

Commit

Permalink
feat(ChatKnowledge):add similarity score and query rewrite (#880)
Browse files Browse the repository at this point in the history
  • Loading branch information
Aries-ckt authored Dec 4, 2023
1 parent 13fb9d0 commit 54d5b0b
Show file tree
Hide file tree
Showing 72 changed files with 1,452 additions and 502 deletions.
7 changes: 4 additions & 3 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ KNOWLEDGE_SEARCH_TOP_SIZE=5
#KNOWLEDGE_CHUNK_OVERLAP=50
# Control whether to display the source document of knowledge on the front end.
KNOWLEDGE_CHAT_SHOW_RELATIONS=False
# Whether to enable Chat Knowledge Search Rewrite Mode
KNOWLEDGE_SEARCH_REWRITE=False
## EMBEDDING_TOKENIZER - Tokenizer to use for chunking large inputs
## EMBEDDING_TOKEN_LIMIT - Chunk size limit for large inputs
# EMBEDDING_MODEL=all-MiniLM-L6-v2
Expand All @@ -92,16 +94,15 @@ KNOWLEDGE_CHAT_SHOW_RELATIONS=False


#*******************************************************************#
#** DATABASE SETTINGS **#
#** DB-GPT METADATA DATABASE SETTINGS **#
#*******************************************************************#
### SQLite database (Current default database)
LOCAL_DB_PATH=data/default_sqlite.db
LOCAL_DB_TYPE=sqlite

### MYSQL database
# LOCAL_DB_TYPE=mysql
# LOCAL_DB_USER=root
# LOCAL_DB_PASSWORD=aa12345678
# LOCAL_DB_PASSWORD={your_password}
# LOCAL_DB_HOST=127.0.0.1
# LOCAL_DB_PORT=3306
# LOCAL_DB_NAME=dbgpt
Expand Down
54 changes: 54 additions & 0 deletions docker/examples/metadata/duckdb2mysql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import duckdb
import pymysql

""" migrate duckdb to mysql"""

mysql_config = {
"host": "127.0.0.1",
"user": "root",
"password": "your_password",
"db": "dbgpt",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}

duckdb_files_to_tables = {
"pilot/message/chat_history.db": "chat_history",
"pilot/message/connect_config.db": "connect_config",
}

conn_mysql = pymysql.connect(**mysql_config)


def migrate_table(duckdb_file_path, source_table, destination_table, conn_mysql):
conn_duckdb = duckdb.connect(duckdb_file_path)
try:
cursor = conn_duckdb.cursor()
cursor.execute(f"SELECT * FROM {source_table}")
column_names = [
desc[0] for desc in cursor.description if desc[0].lower() != "id"
]
select_columns = ", ".join(column_names)

cursor.execute(f"SELECT {select_columns} FROM {source_table}")
results = cursor.fetchall()

with conn_mysql.cursor() as cursor_mysql:
for row in results:
placeholders = ", ".join(["%s"] * len(row))
insert_query = f"INSERT INTO {destination_table} ({', '.join(column_names)}) VALUES ({placeholders})"
cursor_mysql.execute(insert_query, row)
conn_mysql.commit()
finally:
conn_duckdb.close()


try:
for duckdb_file, table in duckdb_files_to_tables.items():
print(f"Migrating table {table} from {duckdb_file}...")
migrate_table(duckdb_file, table, table, conn_mysql)
print(f"Table {table} migrated successfully.")
finally:
conn_mysql.close()

print("Migration completed.")
48 changes: 48 additions & 0 deletions docker/examples/metadata/duckdb2sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import duckdb
import sqlite3

""" migrate duckdb to sqlite"""

duckdb_files_to_tables = {
"pilot/message/chat_history.db": "chat_history",
"pilot/message/connect_config.db": "connect_config",
}

sqlite_db_path = "pilot/meta_data/dbgpt.db"

conn_sqlite = sqlite3.connect(sqlite_db_path)


def migrate_table(duckdb_file_path, source_table, destination_table, conn_sqlite):
conn_duckdb = duckdb.connect(duckdb_file_path)
try:
cursor_duckdb = conn_duckdb.cursor()
cursor_duckdb.execute(f"SELECT * FROM {source_table}")
column_names = [
desc[0] for desc in cursor_duckdb.description if desc[0].lower() != "id"
]
select_columns = ", ".join(column_names)

cursor_duckdb.execute(f"SELECT {select_columns} FROM {source_table}")
results = cursor_duckdb.fetchall()

cursor_sqlite = conn_sqlite.cursor()
for row in results:
placeholders = ", ".join(["?"] * len(row))
insert_query = f"INSERT INTO {destination_table} ({', '.join(column_names)}) VALUES ({placeholders})"
cursor_sqlite.execute(insert_query, row)
conn_sqlite.commit()
cursor_sqlite.close()
finally:
conn_duckdb.close()


try:
for duckdb_file, table in duckdb_files_to_tables.items():
print(f"Migrating table {table} from {duckdb_file} to SQLite...")
migrate_table(duckdb_file, table, table, conn_sqlite)
print(f"Table {table} migrated to SQLite successfully.")
finally:
conn_sqlite.close()

print("Migration to SQLite completed.")
112 changes: 111 additions & 1 deletion docs/docs/application/advanced_tutorial/rag.md
Original file line number Diff line number Diff line change
@@ -1 +1,111 @@
# RAG Parameter Adjustment
# RAG Parameter Adjustment
Each knowledge space supports argument customization, including the relevant arguments for vector retrieval and the arguments for knowledge question-answering prompts.

As shown in the figure below, clicking on the "Knowledge" will trigger a pop-up dialog box. Click the "Arguments" button to enter the parameter tuning interface.
![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/f02039ea-01d7-493a-acd9-027020d54267)


<Tabs
defaultValue="Embedding"
values={[
{label: 'Embedding Argument', value: 'Embedding'},
{label: 'Prompt Argument', value: 'Prompt'},
{label: 'Summary Argument', value: 'Summary'},
]}>
<TabItem value="Embedding" label="Embedding Argument">

![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/8a69aba0-3b28-449d-8fd8-ce5bf8dbf7fc)

:::tip Embedding Arguments
* topk:the top k vectors based on similarity score.
* recall_score:set a similarity threshold score for the retrieval of similar vectors. between 0 and 1. default 0.3.
* recall_type:recall type. now nly support topk by vector similarity.
* model:A model used to create vector representations of text or other data.
* chunk_size:The size of the data chunks used in processing.default 500.
* chunk_overlap:The amount of overlap between adjacent data chunks.default 50.
:::
</TabItem>

<TabItem value="Prompt" label="Prompt Argument">

![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/00f12903-8d70-4bfb-9f58-26f03a6a4773)

:::tip Prompt Arguments
* scene:A contextual parameter used to define the setting or environment in which the prompt is being used.
* template:A pre-defined structure or format for the prompt, which can help ensure that the AI system generates responses that are consistent with the desired style or tone.
* max_token:The maximum number of tokens or words allowed in a prompt.
:::

</TabItem>

<TabItem value="Summary" label="Summary Argument">

![image](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/96782ba2-e9a2-4173-a003-49d44bf874cc)

:::tip summary arguments
* max_iteration: summary max iteration call with llm, default 5. the bigger and better for document summary but time will cost longer.
* concurrency_limit: default summary concurrency call with llm, default 3.
:::

</TabItem>

</Tabs>

# Knowledge Query Rewrite
set ``KNOWLEDGE_SEARCH_REWRITE=True`` in ``.env`` file, and restart the server.

```shell
# Whether to enable Chat Knowledge Search Rewrite Mode
KNOWLEDGE_SEARCH_REWRITE=True
```

# Change Vector Database
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

<Tabs
defaultValue="Chroma"
values={[
{label: 'Chroma', value: 'Chroma'},
{label: 'Milvus', value: 'Milvus'},
{label: 'Weaviate', value: 'Weaviate'},
]}>
<TabItem value="Chroma" label="Chroma">

set ``VECTOR_STORE_TYPE`` in ``.env`` file.

```shell
### Chroma vector db config
VECTOR_STORE_TYPE=Chroma
#CHROMA_PERSIST_PATH=/root/DB-GPT/pilot/data
```
</TabItem>

<TabItem value="Milvus" label="Milvus">


set ``VECTOR_STORE_TYPE`` in ``.env`` file

```shell
### Milvus vector db config
VECTOR_STORE_TYPE=Milvus
MILVUS_URL=127.0.0.1
MILVUS_PORT=19530
#MILVUS_USERNAME
#MILVUS_PASSWORD
#MILVUS_SECURE=
```
</TabItem>

<TabItem value="Weaviate" label="Weaviate">

set ``VECTOR_STORE_TYPE`` in ``.env`` file

```shell
### Weaviate vector db config
VECTOR_STORE_TYPE=Weaviate
#WEAVIATE_URL=https://kt-region-m8hcy0wc.weaviate.network
```

</TabItem>
</Tabs>
2 changes: 0 additions & 2 deletions docs/docs/faq.md

This file was deleted.

55 changes: 55 additions & 0 deletions docs/docs/faq/chatdata.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
ChatData & ChatDB
==================================
ChatData generates SQL from natural language and executes it. ChatDB involves conversing with metadata from the
Database, including metadata about databases, tables, and
fields.![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/d8bfeee9-e982-465e-a2b8-1164b673847e)

### 1.Choose Datasource

If you are using DB-GPT for the first time, you need to add a data source and set the relevant connection information
for the data source.

```{tip}
there are some example data in DB-GPT-NEW/DB-GPT/docker/examples
you can execute sql script to generate data.
```

#### 1.1 Datasource management

![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52)

#### 1.2 Connection management

![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6)

#### 1.3 Add Datasource

![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085)

```{note}
now DB-GPT support Datasource Type
* Mysql
* Sqlite
* DuckDB
* Clickhouse
* Mssql
```

### 2.ChatData
##### Preview Mode
After successfully setting up the data source, you can start conversing with the database. You can ask it to generate
SQL for you or inquire about relevant information on the database's metadata.
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/8acf6a42-e511-48ff-aabf-3d9037485c1c)

##### Editor Mode
In Editor Mode, you can edit your sql and execute it.
![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/1a896dc1-7c0e-4354-8629-30357ffd8d7f)


### 3.ChatDB

![db plugins demonstration](https://github.com/eosphoros-ai/DB-GPT/assets/13723926/e04bc1b1-2c58-4b33-af62-97e89098ace7)


73 changes: 73 additions & 0 deletions docs/docs/faq/install.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
Installation FAQ
==================================


##### Q1: sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to open database file

make sure you pull latest code or create directory with mkdir pilot/data

##### Q2: The model keeps getting killed.

your GPU VRAM size is not enough, try replace your hardware or replace other llms.

##### Q3: How to access website on the public network

You can try to use gradio's [network](https://github.com/gradio-app/gradio/blob/main/gradio/networking.py) to achieve.
```python
import secrets
from gradio import networking
token=secrets.token_urlsafe(32)
local_port=5000
url = networking.setup_tunnel('0.0.0.0', local_port, token)
print(f'Public url: {url}')
time.sleep(60 * 60 * 24)
```

Open `url` with your browser to see the website.

##### Q4: (Windows) execute `pip install -e .` error

The error log like the following:
```
× python setup.py bdist_wheel did not run successfully.
│ exit code: 1
╰─> [11 lines of output]
running bdist_wheel
running build
running build_py
creating build
creating build\lib.win-amd64-cpython-310
creating build\lib.win-amd64-cpython-310\cchardet
copying src\cchardet\version.py -> build\lib.win-amd64-cpython-310\cchardet
copying src\cchardet\__init__.py -> build\lib.win-amd64-cpython-310\cchardet
running build_ext
building 'cchardet._cchardet' extension
error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/
[end of output]
```

Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)



##### Q5: `Torch not compiled with CUDA enabled`

```
2023-08-19 16:24:30 | ERROR | stderr | raise AssertionError("Torch not compiled with CUDA enabled")
2023-08-19 16:24:30 | ERROR | stderr | AssertionError: Torch not compiled with CUDA enabled
```

1. Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)
2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support.


##### Q6: `How to migrate meta table chat_history and connect_config from duckdb to sqlite`
```commandline
python docker/examples/metadata/duckdb2sqlite.py
```

##### Q7: `How to migrate meta table chat_history and connect_config from duckdb to mysql`
```commandline
1. update your mysql username and password in docker/examples/metadata/duckdb2mysql.py
2. python docker/examples/metadata/duckdb2mysql.py
```
Loading

0 comments on commit 54d5b0b

Please sign in to comment.