Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
[NeuralChat] Add langchain extension example and update notebook (#1237)
Browse files Browse the repository at this point in the history
* Add langchain extension example and update notebook

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
  • Loading branch information
lvliang-intel authored Feb 2, 2024
1 parent 7733d44 commit d40e2f1
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG."
"Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original Langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG."
]
},
{
Expand Down Expand Up @@ -69,7 +69,14 @@
"metadata": {},
"outputs": [],
"source": [
"!curl -OL https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
"!curl -o Intel_AR_WR.pdf https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Chatbot code with Langchain APIs:"
]
},
{
Expand All @@ -79,32 +86,79 @@
"outputs": [],
"source": [
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
"from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain.chains import RetrievalQA\n",
"from langchain_core.vectorstores import VectorStoreRetriever\n",
"from langchain_core.documents import Document\n",
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
"from langchain.vectorstores import Chroma\n",
"\n",
"loader = PyPDFLoader(\"./Intel_AR_WR.pdf\")\n",
"langchain_documents = loader.load_and_split()\n",
"embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-base-en-v1.5\")\n",
"knowledge_base = Chroma.from_documents(documents=langchain_documents, embedding=embeddings, persist_directory='./out')\n",
"tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
"model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
"pipe = HuggingFacePipeline(pipeline=pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=128))\n",
"retriever = VectorStoreRetriever(vectorstore=knowledge_base, search_type='mmr', search_kwargs={'k':1, 'fetch_k':5})\n",
"retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)\n",
"result = retrievalQA({\"query\": \"What is IDM 2.0?\"})\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Chatbot code with ITREX Langchain extension APIs:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
"from langchain.chains import RetrievalQA\n",
"from langchain_core.vectorstores import VectorStoreRetriever\n",
"from langchain_core.documents import Document\n",
"from intel_extension_for_transformers.langchain.embeddings import HuggingFaceBgeEmbeddings\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
"from intel_extension_for_transformers.langchain.vectorstores import Chroma\n",
"from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser\n",
"\n",
"document_parser = DocumentParser()\n",
"input_path=\"./412439%281%29_12_Intel_AR_WR.pdf\"\n",
"input_path=\"./Intel_AR_WR.pdf\"\n",
"data_collection=document_parser.load(input=input_path)\n",
"documents = []\n",
"for data, meta in data_collection:\n",
" doc = Document(page_content=data, metadata={\"source\":meta})\n",
" documents.append(doc)\n",
"embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-base-en-v1.5\")\n",
"# load Intel/bge-base-en-v1.5-sts-int8-static from local\n",
"embeddings = HuggingFaceBgeEmbeddings(model_name=\"./bge-base-en-v1.5-sts-int8-static\")\n",
"knowledge_base = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory='./output')\n",
"tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
"model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
"pipe = HuggingFacePipeline(pipeline=pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=128))\n",
"retriever = VectorStoreRetriever(vectorstore=knowledge_base)\n",
"retriever = VectorStoreRetriever(vectorstore=knowledge_base, search_type='mmr', search_kwargs={'k':1, 'fetch_k':5})\n",
"retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)\n",
"result = retrievalQA({\"query\": \"What is IDM 2.0?\"})\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Comparing the execution time, using ITREX Langchain extension APIs can get better performance.\n",
"\n",
"| APIs | Execution Time |\n",
"|-------|-------|\n",
"| Langchain | 106.094 sec |\n",
"| ITREX Langchain Extension | 81.429 sec |\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -127,7 +181,7 @@
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=512)\n",
"document_parser = DocumentParser()\n",
"input_path=\"./412439%281%29_12_Intel_AR_WR.pdf\"\n",
"input_path=\"./Intel_AR_WR.pdf\"\n",
"data_collection=document_parser.load(input=input_path)\n",
"langchain_documents = document_transfer(data_collection)\n",
"child_documents = text_splitter.split_documents(langchain_documents)\n",
Expand All @@ -139,6 +193,23 @@
"docs=retriever.get_relevant_documents(\"What is IDM 2.0?\")\n",
"print(docs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Comparing with result using default Langchain retriever, ITREX Langchain extension APIs can get better result.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"| Retrieval Type | Retrieval Result |\n",
"|-------|-------|\n",
"| default | The Smart Capital strategy helps the company leverage various sources of capital to support investments in manufacturing capacity and fund their IDM 2.0 strategy. |\n",
"| ITREX Langchain Extension | Smart Capital for IDM 2.0 includes aggressive building out of manufacturing shell space, which gives flexibility in how and when to bring additional capacity online based on milestone triggers such as product readiness, market conditions, and customer commitments. It also involves government incentives, Strategic Capacity Investments (SCIP), customer commitments, and external foundries. |"
]
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Introduction

Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG.


We have introduced enhanced vector store operations, allowing users to adjust and fine-tune their settings even after the chatbot has been initialized, providing a more adaptable and user-friendly experience. For Langchain users, integrating and utilizing optimized Vector Stores is straightforward by replacing the original Chroma API in Langchain.

We offer optimized retrievers such as `VectorStoreRetriever` and `ChildParentRetriever` to efficiently handle vector store operations, ensuring optimal retrieval performance. Additionally, we provide quantized embedding models to accelerate embedding documents. These Langchain extension APIs are easy to use and are optimized for both performance and accuracy, specifically tailored for Intel hardware.

# Setup Environment

## Setup Conda

First, you need to install and configure the Conda environment:

```shell
# Download and install Miniconda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda*.sh
source ~/.bashrc
```

## Install numactl

Next, install the numactl library:

```shell
sudo apt install numactl
```

## Install Intel Extension for Transformers

```shell
pip install intel-extension-for-transformers
```

## Install Python dependencies

Install the following Python dependencies using Conda:

```shell
conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y
conda install jemalloc gperftools -c conda-forge -y
conda install git-lfs -y
```

Install other dependencies using pip:

```bash
pip install -r ../../requirements.txt
```

Install retrieval plugin dependencies using pip:
```bash
pip install -r ../../pipeling/plugins/retrieval/requirements.txt
```

# Test

```shell
python main.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from intel_extension_for_transformers.langchain.vectorstores import Chroma
from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
import requests

url = "https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
filename = "Intel_AR_WR.pdf"
response = requests.get(url)
with open(filename, 'wb') as file:
file.write(response.content)
print(f"File '{filename}' downloaded successfully.")

document_parser = DocumentParser()
input_path="./Intel_AR_WR.pdf"
data_collection=document_parser.load(input=input_path)
documents = []
for data, meta in data_collection:
doc = Document(page_content=data, metadata={"source":meta})
documents.append(doc)
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5")
knowledge_base = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory='./output')
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
pipe = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128))
retriever = VectorStoreRetriever(vectorstore=knowledge_base)
retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)
result = retrievalQA({"query": "What is IDM 2.0?"})
print(result)

0 comments on commit d40e2f1

Please sign in to comment.