From 511242280b094d65db7fe9b75969c5cb5fce00be Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:38:04 -0700 Subject: [PATCH] [docs]: standardize vectorstores (#24797) --- .../docs/vectorstores.ipynb | 298 +++++++++++++++++- .../integration_template/vectorstores.py | 102 +++++- .../langchain_cli/namespaces/integration.py | 8 +- 3 files changed, 388 insertions(+), 20 deletions(-) diff --git a/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb b/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb index 5bd7c293fd561..d281fe4c3ed95 100644 --- a/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb +++ b/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb @@ -17,35 +17,88 @@ "source": [ "# __ModuleName__VectorStore\n", "\n", - "This notebook covers how to get started with the __ModuleName__ vector store.\n", + "This notebook covers how to get started with the __ModuleName__ vector store." + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "- TODO: Update with relevant info.\n", + "- TODO: Update minimum version to be correct.\n", + "\n", + "To access __ModuleName__ vector stores you'll need to create a/an __ModuleName__ account, get an API key, and install the `__package_name__` integration package." + ] + }, + { + "cell_type": "raw", + "id": "64e28aa6", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "%pip install -qU \"__package_name__>=MINIMUM_VERSION\"" + ] + }, + { + "cell_type": "markdown", + "id": "9695dee7", + "metadata": {}, + "source": [ + "### Credentials\n", + "\n", + "- TODO: Update with relevant info.\n", "\n", - "## Installation" + "Head to (TODO: link) to sign up to __ModuleName__ and generate an API key. Once you've done this set the __MODULE_NAME___API_KEY environment variable:" ] }, { "cell_type": "code", "execution_count": null, - "id": "d97b55c2", + "id": "894c30e4", "metadata": {}, "outputs": [], "source": [ - "# install package\n", - "!pip install -U __package_name__" + "import getpass\n", + "import os\n", + "\n", + "import os\n", + "\n", + "if not os.getenv(\"__MODULE_NAME___API_KEY\"):\n", + " import getpass\n", + " os.environ[\"__MODULE_NAME___API_KEY\"] = getpass.getpass(\"Enter your __ModuleName__ API key: \")" ] }, { "cell_type": "markdown", - "id": "36fdc060", "metadata": {}, "source": [ - "## Environment Setup\n", - "\n", - "Make sure to set the following environment variables:\n", - "\n", - "- TODO: fill out relevant environment variables or secrets\n", - "- Op\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n", + "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation\n", "\n", - "## Usage" + "- TODO: Fill out with relevant init params" ] }, { @@ -59,7 +112,224 @@ "source": [ "from __module_name__.vectorstores import __ModuleName__VectorStore\n", "\n", - "# TODO: switch for preferred way to init and use your vector store\n" + "vector_store = __ModuleName__VectorStore()" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store\n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17f5efc0", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.documents import Document\n", + "\n", + "document_1 = Document(\n", + " page_content=\"foo\",\n", + " metadata={\"source\": \"https://example.com\"}\n", + ")\n", + "\n", + "document_2 = Document(\n", + " page_content=\"bar\",\n", + " metadata={\"source\": \"https://example.com\"}\n", + ")\n", + "\n", + "document_2 = Document(\n", + " page_content=\"baz\",\n", + " metadata={\"source\": \"https://example.com\"}\n", + ")\n", + "\n", + "documents = [document_1, document_2]\n", + "\n", + "vector_store.add_documents(documents=documents,ids=[\"1\",\"2\"])" + ] + }, + { + "cell_type": "markdown", + "id": "c738c3e0", + "metadata": {}, + "source": [ + "### Update items in vector store\n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0aa8b71", + "metadata": {}, + "outputs": [], + "source": [ + "updated_document = Document(\n", + " page_content=\"qux\",\n", + " metadata={\"source\": \"https://another-example.com\"}\n", + ")\n", + "\n", + "vector_store.update_documents(document_id=\"1\",document=updated_document)" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "### Delete items from vector store\n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef61e188", + "metadata": {}, + "outputs": [], + "source": [ + "vector_store.delete(ids=[\"3\"])" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:\n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [], + "source": [ + "results = vector_store.similarity_search(query=\"thud\",k=1,filter={\"source\":\"https://example.com\"})\n", + "for doc in results:\n", + " print(f\"* {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "If you want to execute a similarity search and receive the corresponding scores you can run:\n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [], + "source": [ + "results = vector_store.similarity_search_with_score(query=\"thud\",k=1,filter={\"source\":\"https://example.com\"})\n", + "for doc, score in results:\n", + " print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a retriever for easier usage in your chains. \n", + "\n", + "- TODO: Edit and then run code cell to generate output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3460093", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = vector_store.as_retriever()\n", + "retriever.invoke(\"thud\")" + ] + }, + { + "cell_type": "markdown", + "id": "901c75dc", + "metadata": {}, + "source": [ + "Using retriever in a simple RAG chain:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "619b5ef6", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain import hub\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n", + "\n", + "prompt = hub.pull(\"rlm/rag-prompt\")\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "rag_chain = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "rag_chain.invoke(\"thud\")" + ] + }, + { + "cell_type": "markdown", + "id": "069f1b5f", + "metadata": {}, + "source": [ + "## TODO: Any functionality specific to this vector store\n", + "\n", + "E.g. creating a persisten database to save to your disk, etc." + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/__module_name__.vectorstores.__ModuleName__VectorStore.html" ] } ], diff --git a/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py b/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py index a7614322b9e44..db74b79e7d387 100644 --- a/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py +++ b/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py @@ -26,15 +26,109 @@ class __ModuleName__VectorStore(VectorStore): - """__ModuleName__ vector store. + # TODO: Replace all TODOs in docstring. + """__ModuleName__ vector store integration. - Example: + # TODO: Replace with relevant packages, env vars. + Setup: + Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``. + + .. code-block:: bash + + pip install -U __package_name__ + export __MODULE_NAME___API_KEY="your-api-key" + + # TODO: Populate with relevant params. + Key init args — indexing params: + collection_name: str + Name of the collection. + embedding_function: Embeddings + Embedding function to use. + + # TODO: Populate with relevant params. + Key init args — client params: + client: Optional[Client] + Client to use. + connection_args: Optional[dict] + Connection arguments. + + # TODO: Replace with relevant init params. + Instantiate: .. code-block:: python from __module_name__.vectorstores import __ModuleName__VectorStore - vectorstore = __ModuleName__VectorStore() - """ + vector_store = __ModuleName__VectorStore( + collection_name="foo", + embedding_function=OpenAIEmbeddings(), + connection_args={"uri": "./foo.db"}, + # other params... + ) + + # TODO: Populate with relevant variables. + Add Documents: + .. code-block:: python + + from langchain_core.documents import Document + + document = Document(page_content="foo", metadata={"baz": "bar"}) + vector_store.add_documents([document],ids=["1"]) + + # TODO: Populate with relevant variables. + Update Documents: + .. code-block:: python + + updated_document = Document( + page_content="qux", + metadata={"bar": "baz"} + ) + + vector_store.update_documents(document_id="1",document=updated_document) + + Delete Documents: + .. code-block:: python + + vector_store.delete(ids=["1"]) + + # TODO: Fill out with relevant variables and example output. + Search: + .. code-block:: python + + results = vector_store.similarity_search(query="thud",k=1) + for doc in results: + print(f"* {doc.page_content} [{doc.metadata}]") + + .. code-block:: python + + # TODO: Example output + + # TODO: Fill out with relevant variables and example output. + Search with score: + .. code-block:: python + + results = vector_store.similarity_search_with_score(query="thud",k=1) + for doc, score in results: + print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]") + + .. code-block:: python + + # TODO: Example output + + # TODO: Fill out with relevant variables and example output. + Use as Retriever: + .. code-block:: python + + retriever = vector_store.as_retriever( + search_type="mmr", + search_kwargs={"k": 1, "fetch_k": 10, "lambda_mult": 0.5}, + ) + retriever.invoke("thud") + + .. code-block:: python + + # TODO: Example output + + """ # noqa: E501 def add_texts( self, diff --git a/libs/cli/langchain_cli/namespaces/integration.py b/libs/cli/langchain_cli/namespaces/integration.py index 51fdde06f569c..522eab2d0ca01 100644 --- a/libs/cli/langchain_cli/namespaces/integration.py +++ b/libs/cli/langchain_cli/namespaces/integration.py @@ -155,8 +155,8 @@ def create_doc( str, typer.Option( help=( - "The type of component. Currently only 'ChatModel', 'DocumentLoader' " - "supported." + "The type of component. Currently only 'ChatModel', " + "'DocumentLoader', 'VectorStore' supported." ), ), ] = "ChatModel", @@ -209,6 +209,10 @@ def create_doc( Path(__file__).parents[1] / "integration_template/docs/document_loaders.ipynb" ) + elif component_type == "VectorStore": + docs_template = ( + Path(__file__).parents[1] / "integration_template/docs/vectorstores.ipynb" + ) shutil.copy(docs_template, destination_path) # replacements in file