diff --git a/docs/docs/integrations/vectorstores/hanavector.ipynb b/docs/docs/integrations/vectorstores/hanavector.ipynb new file mode 100644 index 0000000000000..0cc9f61d31d11 --- /dev/null +++ b/docs/docs/integrations/vectorstores/hanavector.ipynb @@ -0,0 +1,703 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SAP HANA Cloud Vector Engine\n", + "\n", + ">SAP HANA Cloud Vector Engine is a vector store fully integrated into the SAP HANA Cloud database." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Installation of the HANA database driver." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Pip install necessary package\n", + "%pip install --upgrade --quiet hdbcli" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use `OpenAIEmbeddings` so we use the OpenAI API Key." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:02:16.802456Z", + "start_time": "2023-09-09T08:02:07.065604Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "# Use OPENAI_API_KEY env variable\n", + "# os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the sample document \"state_of_the_union.txt\" and create chunks from it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:02:25.452472Z", + "start_time": "2023-09-09T08:02:25.441563Z" + } + }, + "outputs": [], + "source": [ + "from langchain.docstore.document import Document\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.vectorstores.hanavector import HanaDB\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "text_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", + "text_chunks = text_splitter.split_documents(text_documents)\n", + "print(f\"Number of document chunks: {len(text_chunks)}\")\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a database connection to a HANA Cloud instance" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:02:28.174088Z", + "start_time": "2023-09-09T08:02:28.162698Z" + } + }, + "outputs": [], + "source": [ + "from hdbcli import dbapi\n", + "\n", + "# Use connection settings from the environment\n", + "connection = dbapi.connect(\n", + " address=os.environ.get(\"HANA_DB_ADDRESS\"),\n", + " port=os.environ.get(\"HANA_DB_PORT\"),\n", + " user=os.environ.get(\"HANA_DB_USER\"),\n", + " password=os.environ.get(\"HANA_DB_PASSWORD\"),\n", + " autocommit=True,\n", + " sslValidateCertificate=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use for accessing the vector embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:04:16.696625Z", + "start_time": "2023-09-09T08:02:31.817790Z" + } + }, + "outputs": [], + "source": [ + "db = HanaDB(\n", + " embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add the loaded document chunks to the table. For this example, we delete any previous content from the table which might exist from previous runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete already existing documents from the table\n", + "db.delete(filter={})\n", + "\n", + "# add the loaded document chunks\n", + "db.add_documents(text_chunks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform a query to get the two best matching document chunks from the ones that we added in the previous step.\n", + "By default \"Cosine Similarity\" is used for the search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = db.similarity_search(query, k=2)\n", + "\n", + "for doc in docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Query the same content with \"Euclidian Distance\". The results shoud be the same as with \"Cosine Similarity\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.vectorstores.utils import DistanceStrategy\n", + "\n", + "db = HanaDB(\n", + " embedding=embeddings,\n", + " connection=connection,\n", + " distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,\n", + " table_name=\"STATE_OF_THE_UNION\",\n", + ")\n", + "\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = db.similarity_search(query, k=2)\n", + "for doc in docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "Maximal Marginal Relevance Search (MMR)\n", + "\n", + "Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. First 20 (fetch_k) items will be retrieved from the DB. The MMR algorithm will then find the best 2 (k) matches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:05:23.276819Z", + "start_time": "2023-09-09T08:05:21.972256Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "docs = db.max_marginal_relevance_search(query, k=2, fetch_k=20)\n", + "for doc in docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Vectorstore Operations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "db = HanaDB(\n", + " connection=connection, embedding=embeddings, table_name=\"LANGCHAIN_DEMO_BASIC\"\n", + ")\n", + "\n", + "# Delete already existing documents from the table\n", + "db.delete(filter={})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can add simple text documents to the existing table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs = [Document(page_content=\"Some text\"), Document(page_content=\"Other docs\")]\n", + "db.add_documents(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add documents with metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs = [\n", + " Document(\n", + " page_content=\"foo\",\n", + " metadata={\"start\": 100, \"end\": 150, \"doc_name\": \"foo.txt\", \"quality\": \"bad\"},\n", + " ),\n", + " Document(\n", + " page_content=\"bar\",\n", + " metadata={\"start\": 200, \"end\": 250, \"doc_name\": \"bar.txt\", \"quality\": \"good\"},\n", + " ),\n", + "]\n", + "db.add_documents(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Query documents with specific metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs = db.similarity_search(\"foobar\", k=2, filter={\"quality\": \"bad\"})\n", + "# With filtering on \"quality\"==\"bad\", only one document should be returned\n", + "for doc in docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)\n", + " print(doc.metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Delete documents with specific metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "db.delete(filter={\"quality\": \"bad\"})\n", + "\n", + "# Now the similarity search with the same filter will return no results\n", + "docs = db.similarity_search(\"foobar\", k=2, filter={\"quality\": \"bad\"})\n", + "print(len(docs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using a VectorStore as a retriever in chains for retrieval augmented generation (RAG)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.memory import ConversationBufferMemory\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "# Access the vector DB with a new table\n", + "db = HanaDB(\n", + " connection=connection,\n", + " embedding=embeddings,\n", + " table_name=\"LANGCHAIN_DEMO_RETRIEVAL_CHAIN\",\n", + ")\n", + "\n", + "# Delete already existing entries from the table\n", + "db.delete(filter={})\n", + "\n", + "# add the loaded document chunks from the \"State Of The Union\" file\n", + "db.add_documents(text_chunks)\n", + "\n", + "# Create a retriever instance of the vector store\n", + "retriever = db.as_retriever()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the prompt." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "prompt_template = \"\"\"\n", + "You are an expert in state of the union topics. You are provided multiple context items that are related to the prompt you have to answer.\n", + "Use the following pieces of context to answer the question at the end.\n", + "\n", + "```\n", + "{context}\n", + "```\n", + "\n", + "Question: {question}\n", + "\"\"\"\n", + "\n", + "PROMPT = PromptTemplate(\n", + " template=prompt_template, input_variables=[\"context\", \"question\"]\n", + ")\n", + "chain_type_kwargs = {\"prompt\": PROMPT}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the ConversationalRetrievalChain, which handles the chat history and the retrieval of similar document chunks to be added to the prompt." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import ConversationalRetrievalChain\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n", + "memory = ConversationBufferMemory(\n", + " memory_key=\"chat_history\", output_key=\"answer\", return_messages=True\n", + ")\n", + "qa_chain = ConversationalRetrievalChain.from_llm(\n", + " llm,\n", + " db.as_retriever(search_kwargs={\"k\": 5}),\n", + " return_source_documents=True,\n", + " memory=memory,\n", + " verbose=False,\n", + " combine_docs_chain_kwargs={\"prompt\": PROMPT},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ask the first question (and verify how many text chunks have been used)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What about Mexico and Guatemala?\"\n", + "\n", + "result = qa_chain.invoke({\"question\": question})\n", + "print(\"Answer from LLM:\")\n", + "print(\"================\")\n", + "print(result[\"answer\"])\n", + "\n", + "source_docs = result[\"source_documents\"]\n", + "print(\"================\")\n", + "print(f\"Number of used source document chunks: {len(source_docs)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Examine the used chunks of the chain in detail. Check if the best ranked chunk contains info about \"Mexico and Guatemala\" as mentioned in the question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for doc in source_docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)\n", + " print(doc.metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ask another question on the same conversational chain. The answer should relate to the previous answer given." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What about other countries?\"\n", + "\n", + "result = qa_chain.invoke({\"question\": question})\n", + "print(\"Answer from LLM:\")\n", + "print(\"================\")\n", + "print(result[\"answer\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard tables vs. \"custom\" tables with vector data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As default behaviour, the table for the embeddings is created with 3 columns\n", + "* A column \"VEC_TEXT\", which contains the text of the Document\n", + "* A column \"VEC_METADATA\", which contains the metadata of the Document\n", + "* A column \"VEC_VECTOR\", which contains the embeddings-vector of the document's text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Access the vector DB with a new table\n", + "db = HanaDB(\n", + " connection=connection, embedding=embeddings, table_name=\"LANGCHAIN_DEMO_NEW_TABLE\"\n", + ")\n", + "\n", + "# Delete already existing entries from the table\n", + "db.delete(filter={})\n", + "\n", + "# Add a simple document with some metadata\n", + "docs = [\n", + " Document(\n", + " page_content=\"A simple document\",\n", + " metadata={\"start\": 100, \"end\": 150, \"doc_name\": \"simple.txt\"},\n", + " )\n", + "]\n", + "db.add_documents(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the columns in table \"LANGCHAIN_DEMO_NEW_TABLE\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cur = connection.cursor()\n", + "cur.execute(\n", + " \"SELECT COLUMN_NAME, DATA_TYPE_NAME FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = CURRENT_SCHEMA AND TABLE_NAME = 'LANGCHAIN_DEMO_NEW_TABLE'\"\n", + ")\n", + "rows = cur.fetchall()\n", + "for row in rows:\n", + " print(row)\n", + "cur.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the value of the inserted document in the three columns " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cur = connection.cursor()\n", + "cur.execute(\n", + " \"SELECT VEC_TEXT, VEC_META, TO_NVARCHAR(VEC_VECTOR) FROM LANGCHAIN_DEMO_NEW_TABLE LIMIT 1\"\n", + ")\n", + "rows = cur.fetchall()\n", + "print(rows[0][0]) # The text\n", + "print(rows[0][1]) # The metadata\n", + "print(rows[0][2]) # The vector\n", + "cur.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Custom tables must have at least three columns that match the semantics of a standard table\n", + "* A column with type \"NCLOB\" or \"NVARCHAR\" for the text/context of the embeddings\n", + "* A column with type \"NCLOB\" or \"NVARCHAR\" for the metadata \n", + "* A column with type REAL_VECTOR for the embedding vector\n", + "\n", + "The table can contain additional columns. When new Documents are inserted to the table, these addtional columns must allow NULL values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new table \"MY_OWN_TABLE\" with three \"standard\" columns and one additional column\n", + "my_own_table_name = \"MY_OWN_TABLE\"\n", + "cur = connection.cursor()\n", + "cur.execute(\n", + " (\n", + " f\"CREATE TABLE {my_own_table_name} (\"\n", + " \"SOME_OTHER_COLUMN NVARCHAR(42), \"\n", + " \"MY_TEXT NVARCHAR(2048), \"\n", + " \"MY_METADATA NVARCHAR(1024), \"\n", + " \"MY_VECTOR REAL_VECTOR )\"\n", + " )\n", + ")\n", + "\n", + "# Create a HanaDB instance with the own table\n", + "db = HanaDB(\n", + " connection=connection,\n", + " embedding=embeddings,\n", + " table_name=my_own_table_name,\n", + " content_column=\"MY_TEXT\",\n", + " metadata_column=\"MY_METADATA\",\n", + " vector_column=\"MY_VECTOR\",\n", + ")\n", + "\n", + "# Add a simple document with some metadata\n", + "docs = [\n", + " Document(\n", + " page_content=\"Some other text\",\n", + " metadata={\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\"},\n", + " )\n", + "]\n", + "db.add_documents(docs)\n", + "\n", + "# Check if data has been inserted into our own table\n", + "cur.execute(f\"SELECT * FROM {my_own_table_name} LIMIT 1\")\n", + "rows = cur.fetchall()\n", + "print(rows[0][0]) # Value of column \"SOME_OTHER_DATA\". Should be NULL/None\n", + "print(rows[0][1]) # The text\n", + "print(rows[0][2]) # The metadata\n", + "print(rows[0][3]) # The vector\n", + "\n", + "cur.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add another document and perform a similarity search on the custom table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs = [\n", + " Document(\n", + " page_content=\"Some more text\",\n", + " metadata={\"start\": 800, \"end\": 950, \"doc_name\": \"more.txt\"},\n", + " )\n", + "]\n", + "db.add_documents(docs)\n", + "\n", + "query = \"What's up?\"\n", + "docs = db.similarity_search(query, k=2)\n", + "for doc in docs:\n", + " print(\"-\" * 80)\n", + " print(doc.page_content)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/modules/data_connection/indexing.ipynb b/docs/docs/modules/data_connection/indexing.ipynb index 6298847217178..fe0a9a0a2638b 100644 --- a/docs/docs/modules/data_connection/indexing.ipynb +++ b/docs/docs/modules/data_connection/indexing.ipynb @@ -60,7 +60,7 @@ " * document addition by id (`add_documents` method with `ids` argument)\n", " * delete by id (`delete` method with `ids` argument)\n", "\n", - "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", + "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", " \n", "## Caution\n", "\n", diff --git a/libs/community/langchain_community/vectorstores/__init__.py b/libs/community/langchain_community/vectorstores/__init__.py index a5fe62dd99dd3..949770bca89cf 100644 --- a/libs/community/langchain_community/vectorstores/__init__.py +++ b/libs/community/langchain_community/vectorstores/__init__.py @@ -204,6 +204,12 @@ def _import_faiss() -> Any: return FAISS +def _import_hanavector() -> Any: + from langchain_community.vectorstores.hanavector import HanaDB + + return HanaDB + + def _import_hologres() -> Any: from langchain_community.vectorstores.hologres import Hologres @@ -527,6 +533,8 @@ def __getattr__(name: str) -> Any: return _import_epsilla() elif name == "FAISS": return _import_faiss() + elif name == "HanaDB": + return _import_hanavector() elif name == "Hologres": return _import_hologres() elif name == "KDBAI": @@ -645,6 +653,7 @@ def __getattr__(name: str) -> Any: "ElasticsearchStore", "Epsilla", "FAISS", + "HanaDB", "Hologres", "KDBAI", "LanceDB", diff --git a/libs/community/langchain_community/vectorstores/hanavector.py b/libs/community/langchain_community/vectorstores/hanavector.py new file mode 100644 index 0000000000000..04eec65a44868 --- /dev/null +++ b/libs/community/langchain_community/vectorstores/hanavector.py @@ -0,0 +1,575 @@ +"""SAP HANA Cloud Vector Engine""" +from __future__ import annotations + +import importlib.util +import json +import re +from typing import ( + TYPE_CHECKING, + Callable, + Iterable, + List, + Optional, + Tuple, + Type, +) + +import numpy as np +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.runnables.config import run_in_executor +from langchain_core.vectorstores import VectorStore + +from langchain_community.vectorstores.utils import ( + DistanceStrategy, + maximal_marginal_relevance, +) + +if TYPE_CHECKING: + from hdbcli import dbapi + +HANA_DISTANCE_FUNCTION: dict = { + DistanceStrategy.COSINE: ("COSINE_SIMILARITY", "DESC"), + DistanceStrategy.EUCLIDEAN_DISTANCE: ("L2DISTANCE", "ASC"), +} + +default_distance_strategy = DistanceStrategy.COSINE +default_table_name: str = "EMBEDDINGS" +default_content_column: str = "VEC_TEXT" +default_metadata_column: str = "VEC_META" +default_vector_column: str = "VEC_VECTOR" +default_vector_column_length: int = -1 # -1 means dynamic length + + +class HanaDB(VectorStore): + """SAP HANA Cloud Vector Engine + + The prerequisite for using this class is the installation of the ``hdbcli`` + Python package. + + The HanaDB vectorstore can be created by providing an embedding function and + an existing database connection. Optionally, the names of the table and the + columns to use. + """ + + def __init__( + self, + connection: dbapi.Connection, + embedding: Embeddings, + distance_strategy: DistanceStrategy = default_distance_strategy, + table_name: str = default_table_name, + content_column: str = default_content_column, + metadata_column: str = default_metadata_column, + vector_column: str = default_vector_column, + vector_column_length: int = default_vector_column_length, + ): + # Check if the hdbcli package is installed + if importlib.util.find_spec("hdbcli") is None: + raise ImportError( + "Could not import hdbcli python package. " + "Please install it with `pip install hdbcli`." + ) + + valid_distance = False + for key in HANA_DISTANCE_FUNCTION.keys(): + if key is distance_strategy: + valid_distance = True + if not valid_distance: + raise ValueError( + "Unsupported distance_strategy: {}".format(distance_strategy) + ) + + self.connection = connection + self.embedding = embedding + self.distance_strategy = distance_strategy + self.table_name = HanaDB._sanitize_name(table_name) + self.content_column = HanaDB._sanitize_name(content_column) + self.metadata_column = HanaDB._sanitize_name(metadata_column) + self.vector_column = HanaDB._sanitize_name(vector_column) + self.vector_column_length = HanaDB._sanitize_int(vector_column_length) + + # Check if the table exists, and eventually create it + if not self._table_exists(self.table_name): + sql_str = ( + f"CREATE TABLE {self.table_name}(" + f"{self.content_column} NCLOB, " + f"{self.metadata_column} NCLOB, " + f"{self.vector_column} REAL_VECTOR " + ) + if self.vector_column_length == -1: + sql_str += ");" + else: + sql_str += f"({self.vector_column_length}));" + + try: + cur = self.connection.cursor() + cur.execute(sql_str) + finally: + cur.close() + + # Check if the needed columns exist and have the correct type + self._check_column(self.table_name, self.content_column, ["NCLOB", "NVARCHAR"]) + self._check_column(self.table_name, self.metadata_column, ["NCLOB", "NVARCHAR"]) + self._check_column( + self.table_name, + self.vector_column, + ["REAL_VECTOR"], + self.vector_column_length, + ) + + def _table_exists(self, table_name) -> bool: + sql_str = ( + "SELECT COUNT(*) FROM SYS.TABLES WHERE SCHEMA_NAME = CURRENT_SCHEMA" + " AND TABLE_NAME = ?" + ) + try: + cur = self.connection.cursor() + cur.execute(sql_str, (table_name)) + if cur.has_result_set(): + rows = cur.fetchall() + if rows[0][0] == 1: + return True + finally: + cur.close() + return False + + def _check_column(self, table_name, column_name, column_type, column_length=None): + sql_str = ( + "SELECT DATA_TYPE_NAME, LENGTH FROM SYS.TABLE_COLUMNS WHERE " + "SCHEMA_NAME = CURRENT_SCHEMA " + "AND TABLE_NAME = ? AND COLUMN_NAME = ?" + ) + try: + cur = self.connection.cursor() + cur.execute(sql_str, (table_name, column_name)) + if cur.has_result_set(): + rows = cur.fetchall() + if len(rows) == 0: + raise AttributeError(f"Column {column_name} does not exist") + # Check data type + if rows[0][0] not in column_type: + raise AttributeError( + f"Column {column_name} has the wrong type: {rows[0][0]}" + ) + # Check length, if parameter was provided + if column_length is not None: + if rows[0][1] != column_length: + raise AttributeError( + f"Column {column_name} has the wrong length: {rows[0][1]}" + ) + else: + raise AttributeError(f"Column {column_name} does not exist") + finally: + cur.close() + + @property + def embeddings(self) -> Embeddings: + return self.embedding + + def _sanitize_name(input_str: str) -> str: + # Remove characters that are not alphanumeric or underscores + return re.sub(r"[^a-zA-Z0-9_]", "", input_str) + + def _sanitize_int(input_int: any) -> int: + value = int(str(input_int)) + if value < -1: + raise ValueError(f"Value ({value}) must not be smaller than -1") + return int(str(input_int)) + + def _sanitize_list_float(embedding: List[float]) -> List[float]: + for value in embedding: + if not isinstance(value, float): + raise ValueError(f"Value ({value}) does not have type float") + return embedding + + # Compile pattern only once, for better performance + _compiled_pattern = re.compile("^[_a-zA-Z][_a-zA-Z0-9]*$") + + def _sanitize_metadata_keys(metadata: dict) -> dict: + for key in metadata.keys(): + if not HanaDB._compiled_pattern.match(key): + raise ValueError(f"Invalid metadata key {key}") + + return metadata + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + embeddings: Optional[List[List[float]]] = None, + ) -> List[str]: + """Add more texts to the vectorstore. + + Args: + texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + Defaults to None. + embeddings (Optional[List[List[float]]], optional): Optional pre-generated + embeddings. Defaults to None. + + Returns: + List[str]: empty list + """ + # Create all embeddings of the texts beforehand to improve performance + if embeddings is None: + embeddings = self.embedding.embed_documents(list(texts)) + + cur = self.connection.cursor() + try: + # Insert data into the table + for i, text in enumerate(texts): + # Use provided values by default or fallback + metadata = metadatas[i] if metadatas else {} + embedding = ( + embeddings[i] + if embeddings + else self.embedding.embed_documents([text])[0] + ) + sql_str = ( + f"INSERT INTO {self.table_name} ({self.content_column}, " + f"{self.metadata_column}, {self.vector_column}) " + f"VALUES (?, ?, TO_REAL_VECTOR (?));" + ) + cur.execute( + sql_str, + ( + text, + json.dumps(HanaDB._sanitize_metadata_keys(metadata)), + f"[{','.join(map(str, embedding))}]", + ), + ) + finally: + cur.close() + return [] + + @classmethod + def from_texts( + cls: Type[HanaDB], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + connection: dbapi.Connection = None, + distance_strategy: DistanceStrategy = default_distance_strategy, + table_name: str = default_table_name, + content_column: str = default_content_column, + metadata_column: str = default_metadata_column, + vector_column: str = default_vector_column, + vector_column_length: int = default_vector_column_length, + ): + """Create a HanaDB instance from raw documents. + This is a user-friendly interface that: + 1. Embeds documents. + 2. Creates a table if it does not yet exist. + 3. Adds the documents to the table. + This is intended to be a quick way to get started. + """ + + instance = cls( + connection=connection, + embedding=embedding, + distance_strategy=distance_strategy, + table_name=table_name, + content_column=content_column, + metadata_column=metadata_column, + vector_column=vector_column, + vector_column_length=vector_column_length, # -1 means dynamic length + ) + instance.add_texts(texts, metadatas) + return instance + + def similarity_search( + self, query: str, k: int = 4, filter: Optional[dict] = None + ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: A dictionary of metadata fields and values to filter by. + Defaults to None. + + Returns: + List of Documents most similar to the query + """ + docs_and_scores = self.similarity_search_with_score( + query=query, k=k, filter=filter + ) + return [doc for doc, _ in docs_and_scores] + + def similarity_search_with_score( + self, query: str, k: int = 4, filter: Optional[dict] = None + ) -> List[Tuple[Document, float]]: + """Return documents and score values most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: A dictionary of metadata fields and values to filter by. + Defaults to None. + + Returns: + List of tuples (containing a Document and a score) that are + most similar to the query + """ + embedding = self.embedding.embed_query(query) + return self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, filter=filter + ) + + def similarity_search_with_score_and_vector_by_vector( + self, embedding: List[float], k: int = 4, filter: Optional[dict] = None + ) -> List[Tuple[Document, float, List[float]]]: + """Return docs most similar to the given embedding. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: A dictionary of metadata fields and values to filter by. + Defaults to None. + + Returns: + List of Documents most similar to the query and + score and the document's embedding vector for each + """ + result = [] + k = HanaDB._sanitize_int(k) + embedding = HanaDB._sanitize_list_float(embedding) + distance_func_name = HANA_DISTANCE_FUNCTION[self.distance_strategy][0] + embedding_as_str = ",".join(map(str, embedding)) + sql_str = ( + f"SELECT TOP {k}" + f" {self.content_column}, " # row[0] + f" {self.metadata_column}, " # row[1] + f" TO_NVARCHAR({self.vector_column}), " # row[2] + f" {distance_func_name}({self.vector_column}, TO_REAL_VECTOR " + f" (ARRAY({embedding_as_str}))) AS CS " # row[3] + f"FROM {self.table_name}" + ) + order_str = f" order by CS {HANA_DISTANCE_FUNCTION[self.distance_strategy][1]}" + where_str, query_tuple = self._create_where_by_filter(filter) + sql_str = sql_str + where_str + sql_str = sql_str + order_str + try: + cur = self.connection.cursor() + cur.execute(sql_str, query_tuple) + if cur.has_result_set(): + rows = cur.fetchall() + for row in rows: + js = json.loads(row[1]) + doc = Document(page_content=row[0], metadata=js) + result_vector = HanaDB._parse_float_array_from_string(row[2]) + result.append((doc, row[3], result_vector)) + finally: + cur.close() + return result + + def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4, filter: Optional[dict] = None + ) -> List[Tuple[Document, float]]: + """Return docs most similar to the given embedding. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: A dictionary of metadata fields and values to filter by. + Defaults to None. + + Returns: + List of Documents most similar to the query and score for each + """ + whole_result = self.similarity_search_with_score_and_vector_by_vector( + embedding=embedding, k=k, filter=filter + ) + return [(result_item[0], result_item[1]) for result_item in whole_result] + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, filter: Optional[dict] = None + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: A dictionary of metadata fields and values to filter by. + Defaults to None. + + Returns: + List of Documents most similar to the query vector. + """ + docs_and_scores = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, filter=filter + ) + return [doc for doc, _ in docs_and_scores] + + def _create_where_by_filter(self, filter): + query_tuple = [] + where_str = "" + if filter: + for i, key in enumerate(filter.keys()): + if i == 0: + where_str += " WHERE " + else: + where_str += " AND " + + where_str += f" JSON_VALUE({self.metadata_column}, '$.{key}') = ?" + + if isinstance(filter[key], bool): + if filter[key]: + query_tuple.append("true") + else: + query_tuple.append("false") + elif isinstance(filter[key], int) or isinstance(filter[key], str): + query_tuple.append(filter[key]) + else: + raise ValueError( + f"Unsupported filter data-type: {type(filter[key])}" + ) + + return where_str, query_tuple + + def delete( + self, ids: Optional[List[str]] = None, filter: Optional[dict] = None + ) -> Optional[bool]: + """Delete entries by filter with metadata values + + Args: + ids: Deletion with ids is not supported! A ValueError will be raised. + filter: A dictionary of metadata fields and values to filter by. + An empty filter ({}) will delete all entries in the table. + + Returns: + Optional[bool]: True, if deletion is technically successful. + Deletion of zero entries, due to non-matching filters is a success. + """ + + if ids is not None: + raise ValueError("Deletion via ids is not supported") + + if filter is None: + raise ValueError("Parameter 'filter' is required when calling 'delete'") + + where_str, query_tuple = self._create_where_by_filter(filter) + sql_str = f"DELETE FROM {self.table_name} {where_str}" + + try: + cur = self.connection.cursor() + cur.execute(sql_str, query_tuple) + finally: + cur.close() + + return True + + async def adelete( + self, ids: Optional[List[str]] = None, filter: Optional[dict] = None + ) -> Optional[bool]: + """Delete by vector ID or other criteria. + + Args: + ids: List of ids to delete. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + return await run_in_executor(None, self.delete, ids=ids, filter=filter) + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[dict] = None, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: search query text. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5. + filter: Filter on metadata properties, e.g. + { + "str_property": "foo", + "int_property": 123 + } + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding = self.embedding.embed_query(query) + return self.max_marginal_relevance_search_by_vector( + embedding=embedding, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + filter=filter, + ) + + def _parse_float_array_from_string(array_as_string: str) -> List[float]: + array_wo_brackets = array_as_string[1:-1] + return [float(x) for x in array_wo_brackets.split(",")] + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[dict] = None, + ) -> List[Document]: + whole_result = self.similarity_search_with_score_and_vector_by_vector( + embedding=embedding, k=fetch_k, filter=filter + ) + embeddings = [result_item[2] for result_item in whole_result] + mmr_doc_indexes = maximal_marginal_relevance( + np.array(embedding), embeddings, lambda_mult=lambda_mult, k=k + ) + + return [whole_result[i][0] for i in mmr_doc_indexes] + + async def amax_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance.""" + return await run_in_executor( + None, + self.max_marginal_relevance_search_by_vector, + embedding=embedding, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + ) + + @staticmethod + def _cosine_relevance_score_fn(distance: float) -> float: + return distance + + def _select_relevance_score_fn(self) -> Callable[[float], float]: + """ + The 'correct' relevance function + may differ depending on a few things, including: + - the distance / similarity metric used by the VectorStore + - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) + - embedding dimensionality + - etc. + + Vectorstores should define their own selection based method of relevance. + """ + if self.distance_strategy == DistanceStrategy.COSINE: + return HanaDB._cosine_relevance_score_fn + elif self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE: + return HanaDB._euclidean_relevance_score_fn + else: + raise ValueError( + "Unsupported distance_strategy: {}".format(self.distance_strategy) + ) diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index cd2e0d3ae5e0c..59c6e780fb055 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1173,6 +1173,7 @@ files = [ {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37"}, {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa"}, {file = "contourpy-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa"}, + {file = "contourpy-1.1.0-cp310-cp310-win32.whl", hash = "sha256:9b2dd2ca3ac561aceef4c7c13ba654aaa404cf885b187427760d7f7d4c57cff8"}, {file = "contourpy-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2"}, {file = "contourpy-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882"}, {file = "contourpy-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545"}, @@ -1181,6 +1182,7 @@ files = [ {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e"}, {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a"}, {file = "contourpy-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e"}, + {file = "contourpy-1.1.0-cp311-cp311-win32.whl", hash = "sha256:edb989d31065b1acef3828a3688f88b2abb799a7db891c9e282df5ec7e46221b"}, {file = "contourpy-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"}, {file = "contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed"}, {file = "contourpy-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae"}, @@ -1189,6 +1191,7 @@ files = [ {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94"}, {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f"}, {file = "contourpy-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1"}, + {file = "contourpy-1.1.0-cp38-cp38-win32.whl", hash = "sha256:108dfb5b3e731046a96c60bdc46a1a0ebee0760418951abecbe0fc07b5b93b27"}, {file = "contourpy-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4"}, {file = "contourpy-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9"}, {file = "contourpy-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a"}, @@ -1197,6 +1200,7 @@ files = [ {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f"}, {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a"}, {file = "contourpy-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002"}, + {file = "contourpy-1.1.0-cp39-cp39-win32.whl", hash = "sha256:71551f9520f008b2950bef5f16b0e3587506ef4f23c734b71ffb7b89f8721999"}, {file = "contourpy-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d"}, {file = "contourpy-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493"}, {file = "contourpy-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439"}, @@ -2957,6 +2961,29 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "hdbcli" +version = "2.19.21" +description = "SAP HANA Python Client" +optional = true +python-versions = "*" +files = [ + {file = "hdbcli-2.19.21-cp27-cp27m-macosx_10_7_x86_64.whl", hash = "sha256:3028f04b86de2d9834a69f3fec2abb58201be3f1cbc357a63af18d4becaab1d3"}, + {file = "hdbcli-2.19.21-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:f5e5ad76e77eff67ffad4f7db4a9cbe3e6b9c0399e39bd31ffeb4136d2192bc0"}, + {file = "hdbcli-2.19.21-cp27-cp27m-manylinux2014_ppc64le.whl", hash = "sha256:a8ceca28c6b80c5e6f8fc80a3517d7e843b9c3288f8b03c49316be68468d3848"}, + {file = "hdbcli-2.19.21-cp27-cp27m-win_amd64.whl", hash = "sha256:c963a8fa2f3405024051812048479bdd527d730351473f354d85e7fd933bf7ce"}, + {file = "hdbcli-2.19.21-cp27-cp27mu-macosx_10_7_x86_64.whl", hash = "sha256:98e72291fd5c226b22636274c3ccadb93ff2e3b54b98bff3f37e402ecfd73151"}, + {file = "hdbcli-2.19.21-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:9773cc00cfd72ac7c2ad102560ca747bd5077437bed8bbb812071fa0ceb195a2"}, + {file = "hdbcli-2.19.21-cp27-cp27mu-manylinux2014_ppc64le.whl", hash = "sha256:ba5cf42ea026a1b1677c2c8bdbf2e6b77fbbabb7506671485740e675a6a5345a"}, + {file = "hdbcli-2.19.21-cp34-abi3-macosx_10_11_x86_64.whl", hash = "sha256:fac185d39a7a143a3c505c3e4260d0fc1b244589d4bea126e248e70e9e994e2b"}, + {file = "hdbcli-2.19.21-cp34-abi3-manylinux1_x86_64.whl", hash = "sha256:3c20763ba687acab151680c296c9daddbbbb7107a9790cf953da9bc527e373b9"}, + {file = "hdbcli-2.19.21-cp34-abi3-manylinux2014_ppc64le.whl", hash = "sha256:e20a3f60039875d03165c5790993952f5e2ec8efe141e051f7e154d96afc79a4"}, + {file = "hdbcli-2.19.21-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:7c7c50e89fe03be434460d407f2b74196eadde21db4046d52175a22b879ffa28"}, + {file = "hdbcli-2.19.21-cp36-abi3-win32.whl", hash = "sha256:d8529099b535b2c02ddb923ef8006132cf548e358f0bb0afdef3d4d81adc74d0"}, + {file = "hdbcli-2.19.21-cp36-abi3-win_amd64.whl", hash = "sha256:7c631a467f15cbb0d91655c2059b3c421e2fa0451ffeb500a3461aa4456e3fa2"}, + {file = "hdbcli-2.19.21-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:f8607479efef3dea5fc4181806a20ffe6552ef0212efc371c93a15bf2d50c3b4"}, +] + [[package]] name = "hologres-vector" version = "0.0.6" @@ -3917,7 +3944,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.14" +version = "0.1.15" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -4164,6 +4191,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -6723,6 +6760,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6730,8 +6768,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6748,6 +6793,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6755,6 +6801,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -7726,7 +7773,9 @@ python-versions = ">=3.7" files = [ {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea"}, {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34"}, {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3"}, {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965"}, {file = "SQLAlchemy-2.0.23-cp310-cp310-win32.whl", hash = "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8"}, {file = "SQLAlchemy-2.0.23-cp310-cp310-win_amd64.whl", hash = "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6"}, @@ -7763,7 +7812,9 @@ files = [ {file = "SQLAlchemy-2.0.23-cp38-cp38-win_amd64.whl", hash = "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-win32.whl", hash = "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884"}, {file = "SQLAlchemy-2.0.23-cp39-cp39-win_amd64.whl", hash = "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b"}, @@ -9175,9 +9226,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] cli = ["typer"] -extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"] +extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict", "zhipuai"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "73184aec5978e0de5b99029724164fa76394beb6359b59763ca488a258b0df4d" +content-hash = "c03bd15da5fd84ec91adec43e62b06623b6ec51003530a762455f74a4ee3715f" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 9a781bc75b05c..14896a667e09d 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -88,6 +88,7 @@ azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true} oracle-ads = {version = "^2.9.1", optional = true} zhipuai = {version = "^1.0.7", optional = true} elasticsearch = {version = "^8.12.0", optional = true} +hdbcli = {version = "^2.19.21", optional = true} [tool.poetry.group.test] optional = true @@ -251,6 +252,7 @@ extended_testing = [ "oracle-ads", "zhipuai", "elasticsearch", + "hdbcli", ] [tool.ruff] diff --git a/libs/community/tests/integration_tests/vectorstores/test_hanavector.py b/libs/community/tests/integration_tests/vectorstores/test_hanavector.py new file mode 100644 index 0000000000000..dfcdb8c7040e9 --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/test_hanavector.py @@ -0,0 +1,891 @@ +"""Test HANA vectorstore functionality.""" +import os +import random +from typing import List + +import numpy as np +import pytest + +from langchain_community.vectorstores import HanaDB +from langchain_community.vectorstores.utils import DistanceStrategy +from tests.integration_tests.vectorstores.fake_embeddings import ( + ConsistentFakeEmbeddings, +) + +try: + from hdbcli import dbapi + + hanadb_installed = True +except ImportError: + hanadb_installed = False + + +class NormalizedFakeEmbeddings(ConsistentFakeEmbeddings): + """Fake embeddings with normalization. For testing purposes.""" + + def normalize(self, vector: List[float]) -> List[float]: + """Normalize vector.""" + return [float(v / np.linalg.norm(vector)) for v in vector] + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.normalize(v) for v in super().embed_documents(texts)] + + def embed_query(self, text: str) -> List[float]: + return self.normalize(super().embed_query(text)) + + +embedding = NormalizedFakeEmbeddings() + + +class ConfigData: + def __init__(self): + self.conn = None + self.schema_name = "" + + +test_setup = ConfigData() + + +def generateSchemaName(cursor): + cursor.execute( + "SELECT REPLACE(CURRENT_UTCDATE, '-', '') || '_' || BINTOHEX(SYSUUID) FROM " + "DUMMY;" + ) + if cursor.has_result_set(): + rows = cursor.fetchall() + uid = rows[0][0] + else: + uid = random.randint(1, 100000000) + return f"VEC_{uid}" + + +def setup_module(module): + test_setup.conn = dbapi.connect( + address=os.environ.get("HANA_DB_ADDRESS"), + port=os.environ.get("HANA_DB_PORT"), + user=os.environ.get("HANA_DB_USER"), + password=os.environ.get("HANA_DB_PASSWORD"), + autocommit=True, + sslValidateCertificate=False, + ) + try: + cur = test_setup.conn.cursor() + test_setup.schema_name = generateSchemaName(cur) + sql_str = f"CREATE SCHEMA {test_setup.schema_name}" + cur.execute(sql_str) + sql_str = f"SET SCHEMA {test_setup.schema_name}" + cur.execute(sql_str) + except dbapi.ProgrammingError: + pass + finally: + cur.close() + + +def teardown_module(module): + try: + cur = test_setup.conn.cursor() + sql_str = f"DROP SCHEMA {test_setup.schema_name} CASCADE" + cur.execute(sql_str) + except dbapi.ProgrammingError: + pass + finally: + cur.close() + + +@pytest.fixture +def texts() -> List[str]: + return ["foo", "bar", "baz"] + + +@pytest.fixture +def metadatas() -> List[str]: + return [ + {"start": 0, "end": 100, "quality": "good", "ready": True}, + {"start": 100, "end": 200, "quality": "bad", "ready": False}, + {"start": 200, "end": 300, "quality": "ugly", "ready": True}, + ] + + +def drop_table(connection, table_name): + try: + cur = connection.cursor() + sql_str = f"DROP TABLE {table_name}" + cur.execute(sql_str) + except dbapi.ProgrammingError: + pass + finally: + cur.close() + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_non_existing_table() -> None: + """Test end to end construction and search.""" + table_name = "NON_EXISTING" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectordb = HanaDB( + connection=test_setup.conn, + embedding=embedding, + distance_strategy=DistanceStrategy.COSINE, + table_name=table_name, + ) + + assert vectordb._table_exists(table_name) + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_table_with_missing_columns() -> None: + table_name = "EXISTING_MISSING_COLS" + try: + drop_table(test_setup.conn, table_name) + cur = test_setup.conn.cursor() + sql_str = f"CREATE TABLE {table_name}(WRONG_COL NVARCHAR(500));" + cur.execute(sql_str) + finally: + cur.close() + + # Check if table is created + exception_occured = False + try: + HanaDB( + connection=test_setup.conn, + embedding=embedding, + distance_strategy=DistanceStrategy.COSINE, + table_name=table_name, + ) + exception_occured = False + except AttributeError: + exception_occured = True + assert exception_occured + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_table_with_nvarchar_content(texts: List[str]) -> None: + table_name = "EXISTING_NVARCHAR" + content_column = "TEST_TEXT" + metadata_column = "TEST_META" + vector_column = "TEST_VECTOR" + try: + drop_table(test_setup.conn, table_name) + cur = test_setup.conn.cursor() + sql_str = ( + f"CREATE TABLE {table_name}({content_column} NVARCHAR(2048), " + f"{metadata_column} NVARCHAR(2048), {vector_column} REAL_VECTOR);" + ) + cur.execute(sql_str) + finally: + cur.close() + + vectordb = HanaDB( + connection=test_setup.conn, + embedding=embedding, + distance_strategy=DistanceStrategy.COSINE, + table_name=table_name, + content_column=content_column, + metadata_column=metadata_column, + vector_column=vector_column, + ) + + vectordb.add_texts(texts=texts) + + # check that embeddings have been created in the table + number_of_texts = len(texts) + number_of_rows = -1 + sql_str = f"SELECT COUNT(*) FROM {table_name}" + cur = test_setup.conn.cursor() + cur.execute(sql_str) + if cur.has_result_set(): + rows = cur.fetchall() + number_of_rows = rows[0][0] + assert number_of_rows == number_of_texts + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_table_with_wrong_typed_columns() -> None: + table_name = "EXISTING_WRONG_TYPES" + content_column = "DOC_TEXT" + metadata_column = "DOC_META" + vector_column = "DOC_VECTOR" + try: + drop_table(test_setup.conn, table_name) + cur = test_setup.conn.cursor() + sql_str = ( + f"CREATE TABLE {table_name}({content_column} INTEGER, " + f"{metadata_column} INTEGER, {vector_column} INTEGER);" + ) + cur.execute(sql_str) + finally: + cur.close() + + # Check if table is created + exception_occured = False + try: + HanaDB( + connection=test_setup.conn, + embedding=embedding, + distance_strategy=DistanceStrategy.COSINE, + table_name=table_name, + ) + exception_occured = False + except AttributeError as err: + print(err) + exception_occured = True + assert exception_occured + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_non_existing_table_fixed_vector_length() -> None: + """Test end to end construction and search.""" + table_name = "NON_EXISTING" + vector_column = "MY_VECTOR" + vector_column_length = 42 + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectordb = HanaDB( + connection=test_setup.conn, + embedding=embedding, + distance_strategy=DistanceStrategy.COSINE, + table_name=table_name, + vector_column=vector_column, + vector_column_length=vector_column_length, + ) + + assert vectordb._table_exists(table_name) + vectordb._check_column( + table_name, vector_column, "REAL_VECTOR", vector_column_length + ) + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_add_texts(texts: List[str]) -> None: + table_name = "TEST_TABLE_ADD_TEXTS" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectordb = HanaDB( + connection=test_setup.conn, embedding=embedding, table_name=table_name + ) + + vectordb.add_texts(texts=texts) + + # check that embeddings have been created in the table + number_of_texts = len(texts) + number_of_rows = -1 + sql_str = f"SELECT COUNT(*) FROM {table_name}" + cur = test_setup.conn.cursor() + cur.execute(sql_str) + if cur.has_result_set(): + rows = cur.fetchall() + number_of_rows = rows[0][0] + assert number_of_rows == number_of_texts + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_from_texts(texts: List[str]) -> None: + table_name = "TEST_TABLE_FROM_TEXTS" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + # test if vectorDB is instance of HanaDB + assert isinstance(vectorDB, HanaDB) + + # check that embeddings have been created in the table + number_of_texts = len(texts) + number_of_rows = -1 + sql_str = f"SELECT COUNT(*) FROM {table_name}" + cur = test_setup.conn.cursor() + cur.execute(sql_str) + if cur.has_result_set(): + rows = cur.fetchall() + number_of_rows = rows[0][0] + assert number_of_rows == number_of_texts + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_simple(texts: List[str]) -> None: + table_name = "TEST_TABLE_SEARCH_SIMPLE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + assert texts[0] == vectorDB.similarity_search(texts[0], 1)[0].page_content + assert texts[1] != vectorDB.similarity_search(texts[0], 1)[0].page_content + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_by_vector_simple(texts: List[str]) -> None: + table_name = "TEST_TABLE_SEARCH_SIMPLE_VECTOR" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + vector = embedding.embed_query(texts[0]) + assert texts[0] == vectorDB.similarity_search_by_vector(vector, 1)[0].page_content + assert texts[1] != vectorDB.similarity_search_by_vector(vector, 1)[0].page_content + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_simple_euclidean_distance( + texts: List[str], +) -> None: + table_name = "TEST_TABLE_SEARCH_EUCLIDIAN" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE, + ) + + assert texts[0] == vectorDB.similarity_search(texts[0], 1)[0].page_content + assert texts[1] != vectorDB.similarity_search(texts[0], 1)[0].page_content + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_metadata( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_METADATA" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3) + + assert texts[0] == search_result[0].page_content + assert metadatas[0]["start"] == search_result[0].metadata["start"] + assert metadatas[0]["end"] == search_result[0].metadata["end"] + assert texts[1] != search_result[0].page_content + assert metadatas[1]["start"] != search_result[0].metadata["start"] + assert metadatas[1]["end"] != search_result[0].metadata["end"] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_metadata_filter( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_FILTER" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3, filter={"start": 100}) + + assert len(search_result) == 1 + assert texts[1] == search_result[0].page_content + assert metadatas[1]["start"] == search_result[0].metadata["start"] + assert metadatas[1]["end"] == search_result[0].metadata["end"] + + search_result = vectorDB.similarity_search( + texts[0], 3, filter={"start": 100, "end": 150} + ) + assert len(search_result) == 0 + + search_result = vectorDB.similarity_search( + texts[0], 3, filter={"start": 100, "end": 200} + ) + assert len(search_result) == 1 + assert texts[1] == search_result[0].page_content + assert metadatas[1]["start"] == search_result[0].metadata["start"] + assert metadatas[1]["end"] == search_result[0].metadata["end"] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_metadata_filter_string( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_FILTER_STRING" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3, filter={"quality": "bad"}) + + assert len(search_result) == 1 + assert texts[1] == search_result[0].page_content + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_metadata_filter_bool( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_FILTER_BOOL" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3, filter={"ready": False}) + + assert len(search_result) == 1 + assert texts[1] == search_result[0].page_content + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_metadata_filter_invalid_type( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_FILTER_INVALID_TYPE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + exception_occured = False + try: + vectorDB.similarity_search(texts[0], 3, filter={"wrong_type": 0.1}) + except ValueError: + exception_occured = True + assert exception_occured + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_score( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_SCORE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search_with_score(texts[0], 3) + + assert search_result[0][0].page_content == texts[0] + assert search_result[0][1] == 1.0 + assert search_result[1][1] <= search_result[0][1] + assert search_result[2][1] <= search_result[1][1] + assert search_result[2][1] >= 0.0 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_relevance_score( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_REL_SCORE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search_with_relevance_scores(texts[0], 3) + + assert search_result[0][0].page_content == texts[0] + assert search_result[0][1] == 1.0 + assert search_result[1][1] <= search_result[0][1] + assert search_result[2][1] <= search_result[1][1] + assert search_result[2][1] >= 0.0 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_relevance_score_with_euclidian_distance( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_REL_SCORE_EUCLIDIAN" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE, + ) + + search_result = vectorDB.similarity_search_with_relevance_scores(texts[0], 3) + + assert search_result[0][0].page_content == texts[0] + assert search_result[0][1] == 1.0 + assert search_result[1][1] <= search_result[0][1] + assert search_result[2][1] <= search_result[1][1] + assert search_result[2][1] >= 0.0 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_similarity_search_with_score_with_euclidian_distance( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_SCORE_DISTANCE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE, + ) + + search_result = vectorDB.similarity_search_with_score(texts[0], 3) + + assert search_result[0][0].page_content == texts[0] + assert search_result[0][1] == 0.0 + assert search_result[1][1] >= search_result[0][1] + assert search_result[2][1] >= search_result[1][1] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_delete_with_filter(texts: List[str], metadatas: List[dict]) -> None: + table_name = "TEST_TABLE_DELETE_FILTER" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Fill table + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 3 + + # Delete one of the three entries + assert vectorDB.delete(filter={"start": 100, "end": 200}) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 2 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +async def test_hanavector_delete_with_filter_async( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_DELETE_FILTER_ASYNC" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Fill table + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 3 + + # Delete one of the three entries + assert await vectorDB.adelete(filter={"start": 100, "end": 200}) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 2 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_delete_all_with_empty_filter( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_DELETE_ALL" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Fill table + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 3 + + # Delete all entries + assert vectorDB.delete(filter={}) + + search_result = vectorDB.similarity_search(texts[0], 3) + assert len(search_result) == 0 + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_delete_called_wrong( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_DELETE_FILTER_WRONG" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Fill table + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + # Delete without filter parameter + exception_occured = False + try: + vectorDB.delete() + except ValueError: + exception_occured = True + assert exception_occured + + # Delete with ids parameter + exception_occured = False + try: + vectorDB.delete(ids=["id1", "id"], filter={"start": 100, "end": 200}) + except ValueError: + exception_occured = True + assert exception_occured + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_max_marginal_relevance_search(texts: List[str]) -> None: + table_name = "TEST_TABLE_MAX_RELEVANCE" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.max_marginal_relevance_search(texts[0], k=2, fetch_k=20) + + assert len(search_result) == 2 + assert search_result[0].page_content == texts[0] + assert search_result[1].page_content != texts[0] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_max_marginal_relevance_search_vector(texts: List[str]) -> None: + table_name = "TEST_TABLE_MAX_RELEVANCE_VECTOR" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + search_result = vectorDB.max_marginal_relevance_search_by_vector( + embedding.embed_query(texts[0]), k=2, fetch_k=20 + ) + + assert len(search_result) == 2 + assert search_result[0].page_content == texts[0] + assert search_result[1].page_content != texts[0] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +async def test_hanavector_max_marginal_relevance_search_async(texts: List[str]) -> None: + table_name = "TEST_TABLE_MAX_RELEVANCE_ASYNC" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + vectorDB = HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + embedding=embedding, + table_name=table_name, + ) + + search_result = await vectorDB.amax_marginal_relevance_search( + texts[0], k=2, fetch_k=20 + ) + + assert len(search_result) == 2 + assert search_result[0].page_content == texts[0] + assert search_result[1].page_content != texts[0] + + +@pytest.mark.skipif(not hanadb_installed, reason="hanadb not installed") +def test_hanavector_filter_prepared_statement_params( + texts: List[str], metadatas: List[dict] +) -> None: + table_name = "TEST_TABLE_FILTER_PARAM" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + # Check if table is created + HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=metadatas, + embedding=embedding, + table_name=table_name, + ) + + cur = test_setup.conn.cursor() + sql_str = ( + f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.start') = '100'" + ) + cur.execute(sql_str) + rows = cur.fetchall() + assert len(rows) == 1 + + query_value = 100 + sql_str = f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.start') = ?" + cur.execute(sql_str, (query_value)) + rows = cur.fetchall() + assert len(rows) == 1 + + sql_str = ( + f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.quality') = 'good'" + ) + cur.execute(sql_str) + rows = cur.fetchall() + assert len(rows) == 1 + + query_value = "good" + sql_str = f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.quality') = ?" + cur.execute(sql_str, (query_value)) + rows = cur.fetchall() + assert len(rows) == 1 + + sql_str = ( + f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.ready') = false" + ) + cur.execute(sql_str) + rows = cur.fetchall() + assert len(rows) == 1 + + # query_value = True + query_value = "true" + sql_str = f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.ready') = ?" + cur.execute(sql_str, (query_value)) + rows = cur.fetchall() + assert len(rows) == 2 + + # query_value = False + query_value = "false" + sql_str = f"SELECT * FROM {table_name} WHERE JSON_VALUE(VEC_META, '$.ready') = ?" + cur.execute(sql_str, (query_value)) + rows = cur.fetchall() + assert len(rows) == 1 + + +def test_invalid_metadata_keys(texts: List[str], metadatas: List[dict]) -> None: + table_name = "TEST_TABLE_INVALID_METADATA" + # Delete table if it exists + drop_table(test_setup.conn, table_name) + + invalid_metadatas = [ + {"sta rt": 0, "end": 100, "quality": "good", "ready": True}, + ] + exception_occured = False + try: + HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=invalid_metadatas, + embedding=embedding, + table_name=table_name, + ) + except ValueError: + exception_occured = True + assert exception_occured + + invalid_metadatas = [ + {"sta/nrt": 0, "end": 100, "quality": "good", "ready": True}, + ] + exception_occured = False + try: + HanaDB.from_texts( + connection=test_setup.conn, + texts=texts, + metadatas=invalid_metadatas, + embedding=embedding, + table_name=table_name, + ) + except ValueError: + exception_occured = True + assert exception_occured diff --git a/libs/community/tests/unit_tests/vectorstores/test_hanavector.py b/libs/community/tests/unit_tests/vectorstores/test_hanavector.py new file mode 100644 index 0000000000000..6eab86d33d9a8 --- /dev/null +++ b/libs/community/tests/unit_tests/vectorstores/test_hanavector.py @@ -0,0 +1,46 @@ +"""Test HanaVector functionality.""" + +from langchain_community.vectorstores import HanaDB + + +def test_int_sanitation_with_illegal_value() -> None: + """Test sanitization of int with illegal value""" + successful = True + try: + HanaDB._sanitize_int("HUGO") + successful = False + except ValueError: + pass + + assert successful + + +def test_int_sanitation_with_legal_values() -> None: + """Test sanitization of int with legal values""" + assert HanaDB._sanitize_int(42) == 42 + + assert HanaDB._sanitize_int("21") == 21 + + +def test_int_sanitation_with_negative_values() -> None: + """Test sanitization of int with legal values""" + assert HanaDB._sanitize_int(-1) == -1 + + assert HanaDB._sanitize_int("-1") == -1 + + +def test_int_sanitation_with_illegal_negative_value() -> None: + """Test sanitization of int with illegal value""" + successful = True + try: + HanaDB._sanitize_int(-2) + successful = False + except ValueError: + pass + + assert successful + + +def test_parse_float_array_from_string() -> None: + array_as_string = "[0.1, 0.2, 0.3]" + assert HanaDB._parse_float_array_from_string(array_as_string) == [0.1, 0.2, 0.3] diff --git a/libs/community/tests/unit_tests/vectorstores/test_public_api.py b/libs/community/tests/unit_tests/vectorstores/test_public_api.py index b94c8ae47ece5..2b96ea5d506c3 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_public_api.py +++ b/libs/community/tests/unit_tests/vectorstores/test_public_api.py @@ -27,6 +27,7 @@ "ElasticsearchStore", "Epsilla", "FAISS", + "HanaDB", "Hologres", "KDBAI", "LanceDB", diff --git a/libs/langchain/tests/unit_tests/indexes/test_indexing.py b/libs/langchain/tests/unit_tests/indexes/test_indexing.py index fda61008dcda4..b5d4c4ab01258 100644 --- a/libs/langchain/tests/unit_tests/indexes/test_indexing.py +++ b/libs/langchain/tests/unit_tests/indexes/test_indexing.py @@ -1233,6 +1233,7 @@ def check_compatibility(vector_store: VectorStore) -> bool: "ElasticVectorSearch", "ElasticsearchStore", "FAISS", + "HanaDB", "MomentoVectorIndex", "MyScale", "PGVector",