-
Notifications
You must be signed in to change notification settings - Fork 106
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from qhjqhj00/tommy-dev
Tommy dev
- Loading branch information
Showing
8 changed files
with
1,572 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ __pycache__/ | |
*.py[cod] | ||
*$py.class | ||
examples/data/* | ||
examples/cache | ||
# C extensions | ||
test.py | ||
*.so | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -672,4 +672,3 @@ If you use MemoRAG in your research, please cite our paper: | |
|
||
|
||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,376 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "4af76c03-6186-4a7c-b406-b911ac18c970", | ||
"metadata": {}, | ||
"source": [ | ||
"## Usage for Long LLMs as memory model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f36489cb-1d3d-42b5-8907-e1411921f619", | ||
"metadata": {}, | ||
"source": [ | ||
"### Chinese Case" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "4d042f9a-2fa5-4b8d-9024-23e470276747", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from memorag import MemoRAG\n", | ||
"model = MemoRAG(\n", | ||
" mem_model_name_or_path=\"shenzhi-wang/Llama3.1-8B-Chinese-Chat\", \n", | ||
" ret_model_name_or_path=\"BAAI/bge-m3\",\n", | ||
" cache_dir=\"path_to_model_cache\", # to specify local model cache directory (optional)\n", | ||
" access_token=\"hugging_face_access_token\" # to specify local model cache directory (optional)\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "ba166bfa-e2e4-48bb-8e45-a6396c6ab3c7", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:05:59.524433Z", | ||
"iopub.status.busy": "2024-09-13T08:05:59.524121Z", | ||
"iopub.status.idle": "2024-09-13T08:05:59.530893Z", | ||
"shell.execute_reply": "2024-09-13T08:05:59.530364Z", | ||
"shell.execute_reply.started": "2024-09-13T08:05:59.524417Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"The book 'Fortress Besieged' (围城) has 218414 characters.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"context = open(\"fortress_besieged.txt\").read()\n", | ||
"print(f\"The book 'Fortress Besieged' (围城) has {len(context)} characters.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "e1c10019-99e5-433d-85ad-a035f7aa7aa1", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:05:59.531830Z", | ||
"iopub.status.busy": "2024-09-13T08:05:59.531522Z", | ||
"iopub.status.idle": "2024-09-13T08:06:38.457038Z", | ||
"shell.execute_reply": "2024-09-13T08:06:38.456337Z", | ||
"shell.execute_reply.started": "2024-09-13T08:05:59.531815Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Patched model for minference..\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00, 2.47s/it]\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Memory file size: 11.05 GB\n", | ||
"Encoded context length: 130754 tokens\n", | ||
"Number of chunks in retrieval corpus: 76\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"## The LLM supports 128K context length, so we truncate the book to around 128K\n", | ||
"## Here the context KV caches are not compressed, so the cache file is big\n", | ||
"model.memorize(context[:100000], \"besieged_cache\", print_stats=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "b6374315-cfd7-4e3f-bf91-5e74953de6ff", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:07:13.567706Z", | ||
"iopub.status.busy": "2024-09-13T08:07:13.567128Z", | ||
"iopub.status.idle": "2024-09-13T08:07:16.671936Z", | ||
"shell.execute_reply": "2024-09-13T08:07:16.671228Z", | ||
"shell.execute_reply.started": "2024-09-13T08:07:13.567682Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using Memory to generate the answer:\n", | ||
" 主人公是个名叫方鸿渐的留学归来的年轻人,他性格温文尔雅,但在感情上有些许纷争和误会,导致与苏小姐和唐小姐的关系出现波折。\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"query = \"主人公是个什么样的人?\"\n", | ||
"res = model(context, query, \"qa\", max_new_tokens=512)\n", | ||
"print(\"Using Memory to generate the answer:\\n\", res)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "7b3b3421-57a6-485c-aedd-2634ee15e5f2", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": false | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:07:07.845024Z", | ||
"iopub.status.busy": "2024-09-13T08:07:07.844167Z", | ||
"iopub.status.idle": "2024-09-13T08:07:07.848029Z", | ||
"shell.execute_reply": "2024-09-13T08:07:07.847461Z", | ||
"shell.execute_reply.started": "2024-09-13T08:07:07.844998Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using MemoRAG to generate the answer:\n", | ||
" 主人公方鸿渐是一个复杂多面的人物,他是故事中的叙事者,也是情感和心理的深度探索者。方鸿渐在故事中展现出对爱情的追求和对现实生活的无奈接受。他对苏小姐和唐晓芙的感情经历,反映出他对爱情的渴望和对现实中关系复杂性的感受。同时,他与辛楣的对话和互动,展示了他在情感和人际关系中的尴尬和挣扎。方鸿渐是一个有着深层情感和心理活动的普通人,通过他的经历和感受,读者可以了解到他在现代中国社会背景下的生活和心态。\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"query = \"主人公是个什么样的人?\"\n", | ||
"res = model(context, query, \"memorag\", max_new_tokens=512)\n", | ||
"print(\"Using MemoRAG to generate the answer:\\n\", res)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "a19f0a04-60ab-4c29-8fc1-ef1bbc904f9b", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": false | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T03:07:54.881700Z", | ||
"iopub.status.busy": "2024-09-13T03:07:54.881126Z", | ||
"iopub.status.idle": "2024-09-13T03:07:54.886486Z", | ||
"shell.execute_reply": "2024-09-13T03:07:54.885904Z", | ||
"shell.execute_reply.started": "2024-09-13T03:07:54.881677Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"### English Case" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "94f45a06-a3b1-4622-b79f-57d02fc2b321", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from memorag import MemoRAG\n", | ||
"model = MemoRAG(\n", | ||
" mem_model_name_or_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\", \n", | ||
" ret_model_name_or_path=\"BAAI/bge-m3\",\n", | ||
" cache_dir=\"path_to_model_cache\", # to specify local model cache directory (optional)\n", | ||
" access_token=\"hugging_face_access_token\" # to specify local model cache directory (optional)\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "ba7525e2-8aa7-4875-bdc3-cff7d5485990", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:01:45.843964Z", | ||
"iopub.status.busy": "2024-09-13T08:01:45.843548Z", | ||
"iopub.status.idle": "2024-09-13T08:02:35.907509Z", | ||
"shell.execute_reply": "2024-09-13T08:02:35.906741Z", | ||
"shell.execute_reply.started": "2024-09-13T08:01:45.843945Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Patched model for minference..\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00, 2.61s/it]\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Memory file size: 14.98 GB\n", | ||
"Encoded context length: 122591 tokens\n", | ||
"Number of chunks in retrieval corpus: 268\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"context = open(\"harry_potter.txt\").read()\n", | ||
"model.memorize(context, \"harry_potter\", print_stats=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "57f2d57b-7d71-4a86-9dc7-939289850096", | ||
"metadata": { | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:05:22.420672Z", | ||
"iopub.status.busy": "2024-09-13T08:05:22.420143Z", | ||
"iopub.status.idle": "2024-09-13T08:05:26.013620Z", | ||
"shell.execute_reply": "2024-09-13T08:05:26.012908Z", | ||
"shell.execute_reply.started": "2024-09-13T08:05:22.420648Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using Memory to generate the answer:\n", | ||
" Harry, Ron, and Hermione are the main characters in the story, and their relationships with each other are central to the plot. They are best friends, and their friendship is a source of comfort and support for each other.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"query = \"How are the mutual relationships of the main roles?\"\n", | ||
"res = model(context, query, \"qa\", max_new_tokens=512)\n", | ||
"print(\"Using Memory to generate the answer:\\n\", res)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "7a89d834-0b42-4a01-90cb-4f6b09fcfb48", | ||
"metadata": { | ||
"ExecutionIndicator": { | ||
"show": true | ||
}, | ||
"execution": { | ||
"iopub.execute_input": "2024-09-13T08:02:41.440516Z", | ||
"iopub.status.busy": "2024-09-13T08:02:41.440091Z", | ||
"iopub.status.idle": "2024-09-13T08:03:08.828958Z", | ||
"shell.execute_reply": "2024-09-13T08:03:08.828415Z", | ||
"shell.execute_reply.started": "2024-09-13T08:02:41.440497Z" | ||
}, | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using MemoRAG to generate the answer:\n", | ||
" The main characters in the story have the following relationships:\n", | ||
"\n", | ||
"* Harry Potter and Ron Weasley: Best friends and partners in their adventures.\n", | ||
"* Harry Potter and Hermione Granger: Close friends and study partners.\n", | ||
"* Ron Weasley and Hermione Granger: Friends and study partners.\n", | ||
"* Ginny Weasley and Harry Potter: Cousin and friend.\n", | ||
"* Percy Weasley and Harry Potter: Cousin and somewhat distant.\n", | ||
"* Draco Malfoy and Harry Potter: Arch-nemesis and rival.\n", | ||
"* Professor Snape and Harry Potter: Teacher and student, with a complex and often antagonistic relationship.\n", | ||
"* Professor Dumbledore and Harry Potter: Mentor and friend.\n", | ||
"* Hagrid and Harry Potter: Friend and mentor.\n", | ||
"* The Weasley family: A close and loving family, with Harry being a part of it.\n", | ||
"\n", | ||
"These relationships are central to the story and drive the plot forward.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"query = \"How are the mutual relationships of the main roles?\"\n", | ||
"res = model(context, query, \"memorag\", max_new_tokens=512)\n", | ||
"print(\"Using MemoRAG to generate the answer:\\n\", res)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "1496419d-c63a-4584-94c9-8fd45350b188", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.