index_search.py

import asyncio
import time

import numpy as np
from create_vector_embeddings import (
    CLUSTER_NAME,
    DATABASE_NAME,
    INSTANCE_NAME,
    PASSWORD,
    PROJECT_ID,
    REGION,
    USER,
    vector_table_name,
)
from langchain_google_alloydb_pg import AlloyDBEngine, AlloyDBVectorStore
from langchain_google_alloydb_pg.indexes import (
    DistanceStrategy,
    HNSWIndex,
    HNSWQueryOptions,
    IVFFlatIndex,
)
from langchain_google_vertexai import VertexAIEmbeddings

DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN
k = 10
query_1 = "Brooding aromas of barrel spice."
query_2 = "Aromas include tropical fruit, broom, brimstone and dried herb."
query_3 = "Wine from spain."
query_4 = "Condensed and dark on the bouquet"
query_5 = (
    "Light, fresh and silky—just what might be expected from cool-climate Pinot Noir"
)
queries = [query_1, query_2, query_3, query_4, query_5]


embedding = VertexAIEmbeddings(
    model_name="textembedding-gecko@latest", project=PROJECT_ID
)


async def get_vector_store():
    engine = await AlloyDBEngine.afrom_instance(
        project_id=PROJECT_ID,
        region=REGION,
        cluster=CLUSTER_NAME,
        instance=INSTANCE_NAME,
        database=DATABASE_NAME,
        user=USER,
        password=PASSWORD,
    )

    vector_store = await AlloyDBVectorStore.create(
        engine=engine,
        distance_strategy=DISTANCE_STRATEGY,
        table_name=vector_table_name,
        embedding_service=embedding,
        index_query_options=HNSWQueryOptions(ef_search=256),
    )
    return vector_store


async def query_vector_with_timing(vector_store, query):
    start_time = time.monotonic()  # timer starts
    docs = await vector_store.asimilarity_search(k=k, query=query)
    end_time = time.monotonic()  # timer ends
    latency = end_time - start_time
    return docs, latency


async def hnsw_search(vector_store, knn_docs):
    hnsw_index = HNSWIndex(
        name="hnsw", distance_strategy=DISTANCE_STRATEGY, m=36, ef_construction=96
    )
    # hnsw_index = HNSWIndex(name="hnsw", distance_strategy=DISTANCE_STRATEGY)
    await vector_store.aapply_vector_index(hnsw_index)
    assert await vector_store.is_valid_index(hnsw_index.name)
    print("HNSW index created.")
    latencies = []
    recalls = []

    for i in range(len(queries)):
        hnsw_docs, latency = await query_vector_with_timing(vector_store, queries[i])
        latencies.append(latency)
        recalls.append(calculate_recall(knn_docs[i], hnsw_docs))
    print(recalls)

    await vector_store.adrop_vector_index(hnsw_index.name)
    # calculate average recall & latency
    average_latency = sum(latencies) / len(latencies)
    average_recall = sum(recalls) / len(recalls)
    return average_latency, average_recall


async def ivfflat_search(vector_store, knn_docs):
    ivfflat_index = IVFFlatIndex(name="ivfflat", distance_strategy=DISTANCE_STRATEGY)
    await vector_store.aapply_vector_index(ivfflat_index)
    assert await vector_store.is_valid_index(ivfflat_index.name)
    print("IVFFLAT index created.")
    latencies = []
    recalls = []

    for i in range(len(queries)):
        ivfflat_docs, latency = await query_vector_with_timing(vector_store, queries[i])
        latencies.append(latency)
        recalls.append(calculate_recall(knn_docs[i], ivfflat_docs))

    await vector_store.adrop_vector_index(ivfflat_index.name)
    # calculate average recall & latency
    average_latency = sum(latencies) / len(latencies)
    average_recall = sum(recalls) / len(recalls)
    return average_latency, average_recall


async def knn_search(vector_store):
    latencies = []
    knn_docs = []
    for query in queries:
        docs, latency = await query_vector_with_timing(vector_store, query)
        latencies.append(latency)
        knn_docs.append(docs)
    average_latency = sum(latencies) / len(latencies)
    return knn_docs, average_latency


def calculate_recall(base, target) -> float:
    # size of intersection / total number of times
    base = {doc.metadata["id"] for doc in base}
    target = {doc.metadata["id"] for doc in target}
    return len(base & target) / len(base)


async def main():
    vector_store = await get_vector_store()
    knn_docs, knn_latency = await knn_search(vector_store)
    hnsw_average_latency, hnsw_average_recall = await hnsw_search(
        vector_store, knn_docs
    )
    ivfflat_average_latency, ivfflat_average_recall = await ivfflat_search(
        vector_store, knn_docs
    )

    print(f"KNN recall: 1.0            KNN latency: {knn_latency}")
    print(
        f"HNSW average recall: {hnsw_average_recall}          HNSW average latency: {hnsw_average_latency}"
    )
    print(
        f"IVFFLAT average recall: {ivfflat_average_recall}    IVFFLAT latency: {ivfflat_average_latency}"
    )


if __name__ == "__main__":
    asyncio.run(main())