diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml index f13bfdbb985..e257dc7f8ec 100644 --- a/.github/workflows/contrib-tests.yml +++ b/.github/workflows/contrib-tests.yml @@ -87,6 +87,10 @@ jobs: --health-retries 5 ports: - 5432:5432 + mongodb: + image: mongodb/mongodb-atlas-local:latest + ports: + - 27017:27017 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -104,6 +108,9 @@ jobs: - name: Install pgvector when on linux run: | pip install -e .[retrievechat-pgvector] + - name: Install mongodb when on linux + run: | + pip install -e .[retrievechat-mongodb] - name: Install unstructured when python-version is 3.9 and on linux if: matrix.python-version == '3.9' run: | diff --git a/autogen/agentchat/contrib/vectordb/base.py b/autogen/agentchat/contrib/vectordb/base.py index 20b6376d01d..ae2e9332590 100644 --- a/autogen/agentchat/contrib/vectordb/base.py +++ b/autogen/agentchat/contrib/vectordb/base.py @@ -186,7 +186,8 @@ def get_docs_by_ids( ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. collection_name: str | The name of the collection. Default is None. include: List[str] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"], ids will always be included. + If None, will include ["metadatas", "documents"], ids will always be included. This may differ + depending on the implementation. kwargs: dict | Additional keyword arguments. Returns: @@ -200,7 +201,8 @@ class VectorDBFactory: Factory class for creating vector databases. """ - PREDEFINED_VECTOR_DB = ["chroma", "pgvector", "qdrant"] + + PREDEFINED_VECTOR_DB = ["chroma", "pgvector", "mongodb", "qdrant"] @staticmethod def create_vector_db(db_type: str, **kwargs) -> VectorDB: @@ -222,6 +224,12 @@ def create_vector_db(db_type: str, **kwargs) -> VectorDB: from .pgvectordb import PGVectorDB return PGVectorDB(**kwargs) + + if db_type.lower() in ["mdb", "mongodb", "atlas"]: + from .mongodb import MongoDBAtlasVectorDB + + return MongoDBAtlasVectorDB(**kwargs) + if db_type.lower() in ["qdrant", "qdrantdb"]: from .qdrant import QdrantVectorDB diff --git a/autogen/agentchat/contrib/vectordb/mongodb.py b/autogen/agentchat/contrib/vectordb/mongodb.py new file mode 100644 index 00000000000..f6ccbe458cc --- /dev/null +++ b/autogen/agentchat/contrib/vectordb/mongodb.py @@ -0,0 +1,526 @@ +from copy import deepcopy +from importlib.metadata import version +from time import monotonic, sleep +from typing import Any, Callable, Dict, Iterable, List, Literal, Mapping, Set, Tuple, Union + +import numpy as np +from pymongo import MongoClient, UpdateOne, errors +from pymongo.collection import Collection +from pymongo.driver_info import DriverInfo +from pymongo.operations import SearchIndexModel +from sentence_transformers import SentenceTransformer + +from .base import Document, ItemID, QueryResults, VectorDB +from .utils import get_logger + +logger = get_logger(__name__) + +DEFAULT_INSERT_BATCH_SIZE = 100_000 +_SAMPLE_SENTENCE = ["The weather is lovely today in paradise."] +_TIMEOUT = 20.0 +_DELAY = 0.5 + + +def with_id_rename(docs: Iterable) -> List[Dict[str, Any]]: + """Utility changes _id field from Collection into id for Document.""" + return [{**{k: v for k, v in d.items() if k != "_id"}, "id": d["_id"]} for d in docs] + + +class MongoDBAtlasVectorDB(VectorDB): + """ + A Collection object for MongoDB. + """ + + def __init__( + self, + connection_string: str = "", + database_name: str = "vector_db", + embedding_function: Callable = SentenceTransformer("all-MiniLM-L6-v2").encode, + collection_name: str = None, + index_name: str = "vector_index", + overwrite: bool = False, + wait_until_ready: bool = False, + ): + """ + Initialize the vector database. + + Args: + connection_string: str | The MongoDB connection string to connect to. Default is ''. + database_name: str | The name of the database. Default is 'vector_db'. + embedding_function: The embedding function used to generate the vector representation. + overwrite: bool | Overwrite existing collection with new information from this object + defaults to False + wait_until_ready: bool | Blocking call to wait until the database indexes are READY + will timeout after 20 seconds. Defaults to False + """ + self.embedding_function = embedding_function + self.index_name = index_name + self.overwrite = overwrite + self._wait_until_ready = wait_until_ready + + # This will get the model dimension size by computing the embeddings dimensions + self.dimensions = self._get_embedding_size() + + try: + self.client = MongoClient(connection_string, driver=DriverInfo(name="autogen")) + self.client.admin.command("ping") + logger.debug("Successfully created MongoClient") + except errors.ServerSelectionTimeoutError as err: + raise ConnectionError("Could not connect to MongoDB server") from err + + self.db = self.client[database_name] + logger.debug(f"Atlas Database name: {self.db.name}") + if collection_name: + self.active_collection = self.create_collection(collection_name, overwrite=self.overwrite) + else: + self.active_collection = None + + def _is_index_ready(self, collection: Collection, index_name: str): + """Check for the index name in the list of available search indexes to see if the + specified index is of status READY + + Args: + collection (Collection): MongoDB Collection to for the search indexes + index_name (str): Vector Search Index name + + Returns: + bool : True if the index is present and READY false otherwise + """ + for index in collection.list_search_indexes(index_name): + if index["type"] == "vectorSearch" and index["status"] == "READY": + return True + return False + + def _wait_for_index(self, collection: Collection, index_name: str, timeout=_TIMEOUT): + """Waits up to 20 seconds for the index to be created to be ready, otherwise + throws a TimeoutError""" + start = monotonic() + while monotonic() - start < timeout: + if self._is_index_ready(collection, index_name): + return + sleep(_DELAY) + raise TimeoutError(f"Index {self.index_name} is not ready!") + + def _get_embedding_size(self): + return len(self.embedding_function(_SAMPLE_SENTENCE)[0]) + + def list_collections(self): + """ + List the collections in the vector database. + + Returns: + List[str] | The list of collections. + """ + return self.db.list_collection_names() + + def create_collection( + self, + collection_name: str, + overwrite: bool = False, + ) -> Collection: + """ + Create a collection in the vector database and create a vector search index in the collection. + If collection already exists, return the existing collection. + + Args: + collection_name: str | The name of the collection. + overwrite: bool | Whether to overwrite the collection if it exists. Default is False. + """ + collection_exists = collection_name in self.db.list_collection_names() + + if collection_exists: + # Create a new collection + coll = self.db[collection_name] + if overwrite: + self.db.drop_collection(collection_name) + coll = self.db.create_collection(collection_name) + else: + coll = self.db.create_collection(collection_name) + + self.create_index_if_not_exists(index_name=self.index_name, collection=coll) + return coll + + def create_index_if_not_exists(self, index_name: str = "vector_index", collection: Collection = None) -> None: + """ + Creates a vector search index on the specified collection in MongoDB. + + Args: + MONGODB_INDEX (str, optional): The name of the vector search index to create. Defaults to "vector_search_index". + collection (Collection, optional): The MongoDB collection to create the index on. Defaults to None. + """ + if not self._is_index_ready(collection, index_name): + self.create_vector_search_index(collection, index_name) + + def get_collection(self, collection_name: str = None) -> Collection: + """ + Get the collection from the vector database. + + Args: + collection_name: str | The name of the collection. Default is None. If None, return the + current active collection. + + Returns: + Collection | The collection object. + """ + if collection_name is None: + if self.active_collection is None: + raise ValueError("No collection is specified.") + else: + logger.debug( + f"No collection is specified. Using current active collection {self.active_collection.name}." + ) + else: + self.active_collection = self.db[collection_name] + + return self.active_collection + + def delete_collection(self, collection_name: str) -> None: + """ + Delete the collection from the vector database. + + Args: + collection_name: str | The name of the collection. + """ + return self.db[collection_name].drop() + + def create_vector_search_index( + self, + collection: Collection, + index_name: Union[str, None] = "vector_index", + similarity: Literal["euclidean", "cosine", "dotProduct"] = "cosine", + ) -> None: + """Create a vector search index in the collection. + + Args: + collection: An existing Collection in the Atlas Database. + index_name: Vector Search Index name. + similarity: Algorithm used for measuring vector similarity. + kwargs: Additional keyword arguments. + + Returns: + None + """ + search_index_model = SearchIndexModel( + definition={ + "fields": [ + { + "type": "vector", + "numDimensions": self.dimensions, + "path": "embedding", + "similarity": similarity, + }, + ] + }, + name=index_name, + type="vectorSearch", + ) + # Create the search index + try: + collection.create_search_index(model=search_index_model) + if self._wait_until_ready: + self._wait_for_index(collection, index_name) + logger.debug(f"Search index {index_name} created successfully.") + except Exception as e: + logger.error( + f"Error creating search index: {e}. \n" + f"Your client must be connected to an Atlas cluster. " + f"You may have to manually create a Collection and Search Index " + f"if you are on a free/shared cluster." + ) + raise e + + def upsert_docs(self, docs, collection): + for doc in docs: + query = {"id": doc["id"]} + doc["embedding"] = np.array(self.embedding_function([doc["content"]])).tolist()[0] + new_values = {"$set": doc} + collection.update_one(query, new_values, upsert=True) + + def insert_docs( + self, + docs: List[Document], + collection_name: str = None, + upsert: bool = False, + batch_size=DEFAULT_INSERT_BATCH_SIZE, + **kwargs, + ) -> None: + """Insert Documents and Vector Embeddings into the collection of the vector database. + + For large numbers of Documents, insertion is performed in batches. + + Args: + docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. + collection_name: str | The name of the collection. Default is None. + upsert: bool | Whether to update the document if it exists. Default is False. + batch_size: Number of documents to be inserted in each batch + """ + if not docs: + logger.info("No documents to insert.") + return + + collection = self.get_collection(collection_name) + if upsert: + self.upsert_docs(docs, collection) + else: + # Sanity checking the first document + if docs[0].get("content") is None: + raise ValueError("The document content is required.") + if docs[0].get("id") is None: + raise ValueError("The document id is required.") + + input_ids = set() + result_ids = set() + id_batch = [] + text_batch = [] + metadata_batch = [] + size = 0 + i = 0 + for doc in docs: + id = doc["id"] + text = doc["content"] + metadata = doc.get("metadata", {}) + id_batch.append(id) + text_batch.append(text) + metadata_batch.append(metadata) + id_size = 1 if isinstance(id, int) else len(id) + size += len(text) + len(metadata) + id_size + if (i + 1) % batch_size == 0 or size >= 47_000_000: + result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch)) + input_ids.update(id_batch) + id_batch = [] + text_batch = [] + metadata_batch = [] + size = 0 + i += 1 + if text_batch: + result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch)) # type: ignore + input_ids.update(id_batch) + + if result_ids != input_ids: + logger.warning( + "Possible data corruption. " + "input_ids not in result_ids: {in_diff}.\n" + "result_ids not in input_ids: {out_diff}".format( + in_diff=input_ids.difference(result_ids), out_diff=result_ids.difference(input_ids) + ) + ) + + def _insert_batch( + self, collection: Collection, texts: List[str], metadatas: List[Mapping[str, Any]], ids: List[ItemID] + ) -> Set[ItemID]: + """Compute embeddings for and insert a batch of Documents into the Collection. + + For performance reasons, we chose to call self.embedding_function just once, + with the hopefully small tradeoff of having recreating Document dicts. + + Args: + collection: MongoDB Collection + texts: List of the main contents of each document + metadatas: List of metadata mappings + ids: List of ids. Note that these are stored as _id in Collection. + + Returns: + List of ids inserted. + """ + n_texts = len(texts) + if n_texts == 0: + return [] + # Embed and create the documents + embeddings = self.embedding_function(texts).tolist() + assert ( + len(embeddings) == n_texts + ), f"The number of embeddings produced by self.embedding_function ({len(embeddings)} does not match the number of texts provided to it ({n_texts})." + to_insert = [ + {"_id": i, "content": t, "metadata": m, "embedding": e} + for i, t, m, e in zip(ids, texts, metadatas, embeddings) + ] + # insert the documents in MongoDB Atlas + insert_result = collection.insert_many(to_insert) # type: ignore + return insert_result.inserted_ids + + def update_docs(self, docs: List[Document], collection_name: str = None, **kwargs: Any) -> None: + """Update documents, including their embeddings, in the Collection. + + Optionally allow upsert as kwarg. + + Uses deepcopy to avoid changing docs. + + Args: + docs: List[Document] | A list of documents. + collection_name: str | The name of the collection. Default is None. + kwargs: Any | Use upsert=True` to insert documents whose ids are not present in collection. + """ + + n_docs = len(docs) + logger.info(f"Preparing to embed and update {n_docs=}") + # Compute the embeddings + embeddings = self.embedding_function([doc["content"] for doc in docs]).tolist() + # Prepare the updates + all_updates = [] + for i in range(n_docs): + doc = deepcopy(docs[i]) + doc["embedding"] = embeddings + doc["_id"] = doc.pop("id") + + all_updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=kwargs.get("upsert", False))) + # Perform update in bulk + collection = self.get_collection(collection_name) + result = collection.bulk_write(all_updates) + + # Log a result summary + logger.info( + "Matched: %s, Modified: %s, Upserted: %s", + result.matched_count, + result.modified_count, + result.upserted_count, + ) + + def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs): + """ + Delete documents from the collection of the vector database. + + Args: + ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + collection_name: str | The name of the collection. Default is None. + """ + collection = self.get_collection(collection_name) + return collection.delete_many({"_id": {"$in": ids}}) + + def get_docs_by_ids( + self, ids: List[ItemID] = None, collection_name: str = None, include: List[str] = None, **kwargs + ) -> List[Document]: + """ + Retrieve documents from the collection of the vector database based on the ids. + + Args: + ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. + collection_name: str | The name of the collection. Default is None. + include: List[str] | The fields to include. + If None, will include ["metadata", "content"], ids will always be included. + Basically, use include to choose whether to include embedding and metadata + kwargs: dict | Additional keyword arguments. + + Returns: + List[Document] | The results. + """ + if include is None: + include_fields = {"_id": 1, "content": 1, "metadata": 1} + else: + include_fields = {k: 1 for k in set(include).union({"_id"})} + collection = self.get_collection(collection_name) + if ids is not None: + docs = collection.find({"_id": {"$in": ids}}, include_fields) + # Return with _id field from Collection into id for Document + return with_id_rename(docs) + else: + docs = collection.find({}, include_fields) + # Return with _id field from Collection into id for Document + return with_id_rename(docs) + + def retrieve_docs( + self, + queries: List[str], + collection_name: str = None, + n_results: int = 10, + distance_threshold: float = -1, + **kwargs, + ) -> QueryResults: + """ + Retrieve documents from the collection of the vector database based on the queries. + + Args: + queries: List[str] | A list of queries. Each query is a string. + collection_name: str | The name of the collection. Default is None. + n_results: int | The number of relevant documents to return. Default is 10. + distance_threshold: float | The threshold for the distance score, only distance smaller than it will be + returned. Don't filter with it if < 0. Default is -1. + wait_until_ready: bool | Will not execute the retrieval operation until the specified vector index is + ready to be queried. Defaults is false. + kwargs: Dict | Additional keyword arguments. Ones of importance follow: + oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm. + It determines the number of nearest neighbor candidates to consider during the search phase. + A higher value leads to more accuracy, but is slower. Default is 10 + + Returns: + QueryResults | For each query string, a list of nearest documents and their scores. + """ + collection = self.get_collection(collection_name) + # Trivial case of an empty collection + if collection.count_documents({}) == 0: + return [] + + # Check status of index! + if self._wait_until_ready: + self._wait_for_index(collection, self.index_name) + logger.info(f"Using index: {self.index_name}") + results = [] + for query_text in queries: + # Compute embedding vector from semantic query + logger.info(f"Query: {query_text}") + query_vector = np.array(self.embedding_function([query_text])).tolist()[0] + # Find documents with similar vectors using the specified index + query_result = _vector_search( + query_vector, + n_results, + collection, + self.index_name, + distance_threshold, + **kwargs, + oversampling_factor=kwargs.get("oversampling_factor", 10), + ) + # Change each _id key to id. with_id_rename, but with (doc, score) tuples + results.append( + [({**{k: v for k, v in d[0].items() if k != "_id"}, "id": d[0]["_id"]}, d[1]) for d in query_result] + ) + return results + + +def _vector_search( + embedding_vector: List[float], + n_results: int, + collection: Collection, + index_name: str, + distance_threshold: float = -1.0, + oversampling_factor=10, + include_embedding=False, +) -> List[Tuple[Dict, float]]: + """Core $vectorSearch Aggregation pipeline. + + Args: + embedding_vector: Embedding vector of semantic query + n_results: Number of documents to return. Defaults to 4. + collection: MongoDB Collection with vector index + index_name: Name of the vector index + distance_threshold: Only distance measures smaller than this will be returned. + Don't filter with it if 1 < x < 0. Default is -1. + oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm. + It determines the number of nearest neighbor candidates to consider during the search phase. + A higher value leads to more accuracy, but is slower. Default = 10 + + Returns: + List of tuples of length n_results from Collection. + Each tuple contains a document dict and a score. + """ + + pipeline = [ + { + "$vectorSearch": { + "index": index_name, + "limit": n_results, + "numCandidates": n_results * oversampling_factor, + "queryVector": embedding_vector, + "path": "embedding", + } + }, + {"$set": {"score": {"$meta": "vectorSearchScore"}}}, + ] + if distance_threshold >= 0.0: + similarity_threshold = 1.0 - distance_threshold + pipeline.append({"$match": {"score": {"$gte": similarity_threshold}}}) + + if not include_embedding: + pipeline.append({"$project": {"embedding": 0}}) + + logger.info("pipeline: %s", pipeline) + agg = collection.aggregate(pipeline) + return [(doc, doc.pop("score")) for doc in agg] diff --git a/notebook/agentchat_mongodb_RetrieveChat.ipynb b/notebook/agentchat_mongodb_RetrieveChat.ipynb new file mode 100644 index 00000000000..f1e696fe6e9 --- /dev/null +++ b/notebook/agentchat_mongodb_RetrieveChat.ipynb @@ -0,0 +1,680 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using RetrieveChat Powered by MongoDB Atlas for Retrieve Augmented Code Generation and Question Answering\n", + "\n", + "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n", + "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n", + "\n", + "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n", + "\n", + "## Table of Contents\n", + "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n", + "\n", + "- [Example 1: Generate code based off docstrings w/o human feedback](#example-1)\n", + "\n", + "````{=mdx}\n", + ":::info Requirements\n", + "Some extra dependencies are needed for this notebook, which can be installed via pip:\n", + "\n", + "```bash\n", + "pip install pyautogen[retrievechat-mongodb] flaml[automl]\n", + "```\n", + "\n", + "For more information, please refer to the [installation guide](/docs/installation/).\n", + ":::\n", + "````\n", + "\n", + "Ensure you have a MongoDB Atlas instance.\n", + "\n", + "If not, a test version can quickly be deployed using Docker.\n", + "\n", + "`docker-compose.yml`\n", + "\n", + "```yml\n", + "version: '3.9'\n", + "\n", + "services:\n", + " mongodb:\n", + " image: mongodb/mongodb-atlas-local:latest\n", + " restart: unless-stopped\n", + " ports:\n", + " - \"27017:27017\"\n", + " environment:\n", + " MONGODB_INITDB_ROOT_USERNAME: mongodb_user\n", + " MONGODB_INITDB_ROOT_PASSWORD: mongodb_password\n", + "```\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set your API Endpoint\n", + "\n", + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "models to use: ['gpt-35-turbo']\n" + ] + } + ], + "source": [ + "import json\n", + "import os\n", + "\n", + "import autogen\n", + "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n", + "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n", + "\n", + "# Accepted file formats for that can be stored in\n", + "# a vector database instance\n", + "from autogen.retrieve_utils import TEXT_FORMATS\n", + "\n", + "config_list = [\n", + " {\n", + " \"model\": \"gpt-35-turbo\",\n", + " \"base_url\": \"\",\n", + " \"api_type\": \"azure\",\n", + " \"api_version\": \"2023-07-01-preview\",\n", + " \"api_key\": \"\",\n", + " },\n", + "]\n", + "assert len(config_list) > 0\n", + "print(\"models to use: \", [config_list[i][\"model\"] for i in range(len(config_list))])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "````{=mdx}\n", + ":::tip\n", + "Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).\n", + ":::\n", + "````\n", + "\n", + "## Construct agents for RetrieveChat\n", + "\n", + "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accepted file formats for `docs_path`:\n", + "['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']\n" + ] + } + ], + "source": [ + "print(\"Accepted file formats for `docs_path`:\")\n", + "print(TEXT_FORMATS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n", + "assistant = RetrieveAssistantAgent(\n", + " name=\"assistant\",\n", + " system_message=\"You are a helpful assistant.\",\n", + " llm_config={\n", + " \"timeout\": 600,\n", + " \"cache_seed\": 42,\n", + " \"config_list\": config_list,\n", + " },\n", + ")\n", + "\n", + "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n", + "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n", + "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n", + "# it is set to None, which works only if the collection is already created.\n", + "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n", + "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n", + "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n", + "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n", + "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n", + "# no files there will be processed. However, the explicitly included urls will still be processed.\n", + "ragproxyagent = RetrieveUserProxyAgent(\n", + " name=\"ragproxyagent\",\n", + " human_input_mode=\"NEVER\",\n", + " max_consecutive_auto_reply=3,\n", + " retrieve_config={\n", + " \"task\": \"code\",\n", + " \"docs_path\": [\n", + " \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n", + " \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n", + " os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n", + " ],\n", + " \"custom_text_types\": [\"non-existent-type\"],\n", + " \"chunk_token_size\": 2000,\n", + " \"model\": config_list[0][\"model\"],\n", + " \"vector_db\": \"mongodb\", # MongoDB Atlas database\n", + " \"collection_name\": \"demo_collection\",\n", + " \"db_config\": {\n", + " \"connection_string\": \"\", # MongoDB Atlas connection string\n", + " \"database_name\": \"\", # MongoDB Atlas database\n", + " \"index_name\":\"vector_index\",\n", + " },\n", + " \"get_or_create\": True, # set to False if you don't want to reuse an existing collection\n", + " \"overwrite\": True, # set to True if you want to overwrite an existing collection\n", + " },\n", + " code_execution_config=False, # set to False if you don't want to execute the code\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1\n", + "\n", + "[Back to top](#table-of-contents)\n", + "\n", + "Use RetrieveChat to help generate sample code and automatically run the code and fix errors if there is any.\n", + "\n", + "Problem: Which API should I use if I want to use FLAML for a classification task and I want to train the model in 30 seconds. Use spark to parallel the training. Force cancel jobs if time limit is reached." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trying to create collection.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-01 08:50:43,934 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Search index vector_index created successfully.\u001b[0m\n", + "2024-07-01 08:50:44,612 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n", + "2024-07-01 08:50:45,064 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Using index: [{'id': '6682a6042cf0e270602c0fe1', 'name': 'vector_index', 'type': 'vectorSearch', 'status': 'READY', 'queryable': True, 'latestDefinitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12, 109000)}, 'latestDefinition': {'fields': [{'type': 'vector', 'numDimensions': 384, 'path': 'embedding', 'similarity': 'cosine'}]}, 'statusDetail': [{'hostname': 'shared-shard-00-search-6xag8e', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}]}]\u001b[0m\n", + "2024-07-01 08:51:00,069 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Query: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\u001b[0m\n", + "2024-07-01 08:51:00,164 - autogen.agentchat.contrib.vectordb.mongodb - INFO - pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 20, 'numCandidates': 200, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query_text How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 20, 'numCandidates': 200, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\n", + "VectorDB returns doc_ids: [[ObjectId('6682a624b9758026836ffab5'), ObjectId('6682a624b9758026836ffad5')]]\n", + "\u001b[32mAdding content of doc 6682a624b9758026836ffab5 to context.\u001b[0m\n", + "\u001b[32mAdding content of doc 6682a624b9758026836ffad5 to context.\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n", + "context provided by the user.\n", + "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n", + "For code generation, you must obey the following rules:\n", + "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n", + "Rule 2. You must follow the formats below to write your code:\n", + "```language\n", + "# your code\n", + "```\n", + "\n", + "User's question is: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "\n", + "Context is: # Integrate - Spark\n", + "\n", + "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n", + "\n", + "- Use Spark ML estimators for AutoML.\n", + "- Use Spark to run training in parallel spark jobs.\n", + "\n", + "## Spark ML Estimators\n", + "\n", + "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n", + "\n", + "### Data\n", + "\n", + "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n", + "\n", + "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n", + "\n", + "This function also accepts optional arguments `index_col` and `default_index_type`.\n", + "\n", + "- `index_col` is the column name to use as the index, default is None.\n", + "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n", + "\n", + "Here is an example code snippet for Spark Data:\n", + "\n", + "```python\n", + "import pandas as pd\n", + "from flaml.automl.spark.utils import to_pandas_on_spark\n", + "\n", + "# Creating a dictionary\n", + "data = {\n", + " \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n", + " \"Age_Years\": [20, 15, 10, 7, 25],\n", + " \"Price\": [100000, 200000, 300000, 240000, 120000],\n", + "}\n", + "\n", + "# Creating a pandas DataFrame\n", + "dataframe = pd.DataFrame(data)\n", + "label = \"Price\"\n", + "\n", + "# Convert to pandas-on-spark dataframe\n", + "psdf = to_pandas_on_spark(dataframe)\n", + "```\n", + "\n", + "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n", + "\n", + "Here is an example of how to use it:\n", + "\n", + "```python\n", + "from pyspark.ml.feature import VectorAssembler\n", + "\n", + "columns = psdf.columns\n", + "feature_cols = [col for col in columns if col != label]\n", + "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n", + "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n", + "```\n", + "\n", + "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n", + "\n", + "### Estimators\n", + "\n", + "#### Model List\n", + "\n", + "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n", + "\n", + "#### Usage\n", + "\n", + "First, prepare your data in the required format as described in the previous section.\n", + "\n", + "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n", + "\n", + "Here is an example code snippet using SparkML models in AutoML:\n", + "\n", + "```python\n", + "import flaml\n", + "\n", + "# prepare your data in pandas-on-spark format as we previously mentioned\n", + "\n", + "automl = flaml.AutoML()\n", + "settings = {\n", + " \"time_budget\": 30,\n", + " \"metric\": \"r2\",\n", + " \"estimator_list\": [\"lgbm_spark\"], # this setting is optional\n", + " \"task\": \"regression\",\n", + "}\n", + "\n", + "automl.fit(\n", + " dataframe=psdf,\n", + " label=label,\n", + " **settings,\n", + ")\n", + "```\n", + "\n", + "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb)\n", + "\n", + "## Parallel Spark Jobs\n", + "\n", + "You can activate Spark as the parallel backend during parallel tuning in both [AutoML](/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) and [Hyperparameter Tuning](/docs/Use-Cases/Tune-User-Defined-Function#parallel-tuning), by setting the `use_spark` to `true`. FLAML will dispatch your job to the distributed Spark backend using [`joblib-spark`](https://github.com/joblib/joblib-spark).\n", + "\n", + "Please note that you should not set `use_spark` to `true` when applying AutoML and Tuning for Spark Data. This is because only SparkML models will be used for Spark Data in AutoML and Tuning. As SparkML models run in parallel, there is no need to distribute them with `use_spark` again.\n", + "\n", + "All the Spark-related arguments are stated below. These arguments are available in both Hyperparameter Tuning and AutoML:\n", + "\n", + "- `use_spark`: boolean, default=False | Whether to use spark to run the training in parallel spark jobs. This can be used to accelerate training on large models and large datasets, but will incur more overhead in time and thus slow down training in some cases. GPU training is not supported yet when use_spark is True. For Spark clusters, by default, we will launch one trial per executor. However, sometimes we want to launch more trials than the number of executors (e.g., local mode). In this case, we can set the environment variable `FLAML_MAX_CONCURRENT` to override the detected `num_executors`. The final number of concurrent trials will be the minimum of `n_concurrent_trials` and `num_executors`.\n", + "- `n_concurrent_trials`: int, default=1 | The number of concurrent trials. When n_concurrent_trials > 1, FLAML performes parallel tuning.\n", + "- `force_cancel`: boolean, default=False | Whether to forcely cancel Spark jobs if the search time exceeded the time budget. Spark jobs include parallel tuning jobs and Spark-based model training jobs.\n", + "\n", + "An example code snippet for using parallel Spark jobs:\n", + "\n", + "```python\n", + "import flaml\n", + "\n", + "automl_experiment = flaml.AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 30,\n", + " \"metric\": \"r2\",\n", + " \"task\": \"regression\",\n", + " \"n_concurrent_trials\": 2,\n", + " \"use_spark\": True,\n", + " \"force_cancel\": True, # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.\n", + "}\n", + "\n", + "automl.fit(\n", + " dataframe=dataframe,\n", + " label=label,\n", + " **automl_settings,\n", + ")\n", + "```\n", + "\n", + "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n", + "# Research\n", + "\n", + "For technical details, please check our research publications.\n", + "\n", + "- [FLAML: A Fast and Lightweight AutoML Library](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. MLSys 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2021flaml,\n", + " title={FLAML: A Fast and Lightweight AutoML Library},\n", + " author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},\n", + " year={2021},\n", + " booktitle={MLSys},\n", + "}\n", + "```\n", + "\n", + "- [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2021cfo,\n", + " title={Frugal Optimization for Cost-related Hyperparameters},\n", + " author={Qingyun Wu and Chi Wang and Silu Huang},\n", + " year={2021},\n", + " booktitle={AAAI},\n", + "}\n", + "```\n", + "\n", + "- [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2021blendsearch,\n", + " title={Economical Hyperparameter Optimization With Blended Search Strategy},\n", + " author={Chi Wang and Qingyun Wu and Silu Huang and Amin Saied},\n", + " year={2021},\n", + " booktitle={ICLR},\n", + "}\n", + "```\n", + "\n", + "- [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{liuwang2021hpolm,\n", + " title={An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models},\n", + " author={Susan Xueqing Liu and Chi Wang},\n", + " year={2021},\n", + " booktitle={ACL},\n", + "}\n", + "```\n", + "\n", + "- [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2021chacha,\n", + " title={ChaCha for Online AutoML},\n", + " author={Qingyun Wu and Chi Wang and John Langford and Paul Mineiro and Marco Rossi},\n", + " year={2021},\n", + " booktitle={ICML},\n", + "}\n", + "```\n", + "\n", + "- [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).\n", + "\n", + "```bibtex\n", + "@inproceedings{wuwang2021fairautoml,\n", + " title={Fair AutoML},\n", + " author={Qingyun Wu and Chi Wang},\n", + " year={2021},\n", + " booktitle={ArXiv preprint arXiv:2111.06495},\n", + "}\n", + "```\n", + "\n", + "- [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).\n", + "\n", + "```bibtex\n", + "@inproceedings{kayaliwang2022default,\n", + " title={Mining Robust Default Configurations for Resource-constrained AutoML},\n", + " author={Moe Kayali and Chi Wang},\n", + " year={2022},\n", + " booktitle={ArXiv preprint arXiv:2202.09927},\n", + "}\n", + "```\n", + "\n", + "- [Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives](https://openreview.net/forum?id=0Ij9_q567Ma). Shaokun Zhang, Feiran Jia, Chi Wang, Qingyun Wu. ICLR 2023 (notable-top-5%).\n", + "\n", + "```bibtex\n", + "@inproceedings{zhang2023targeted,\n", + " title={Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives},\n", + " author={Shaokun Zhang and Feiran Jia and Chi Wang and Qingyun Wu},\n", + " booktitle={International Conference on Learning Representations},\n", + " year={2023},\n", + " url={https://openreview.net/forum?id=0Ij9_q567Ma},\n", + "}\n", + "```\n", + "\n", + "- [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673). Chi Wang, Susan Xueqing Liu, Ahmed H. Awadallah. ArXiv preprint arXiv:2303.04673 (2023).\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2023EcoOptiGen,\n", + " title={Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference},\n", + " author={Chi Wang and Susan Xueqing Liu and Ahmed H. Awadallah},\n", + " year={2023},\n", + " booktitle={ArXiv preprint arXiv:2303.04673},\n", + "}\n", + "```\n", + "\n", + "- [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337). Yiran Wu, Feiran Jia, Shaokun Zhang, Hangyu Li, Erkang Zhu, Yue Wang, Yin Tat Lee, Richard Peng, Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2306.01337 (2023).\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2023empirical,\n", + " title={An Empirical Study on Challenging Math Problem Solving with GPT-4},\n", + " author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang},\n", + " year={2023},\n", + " booktitle={ArXiv preprint arXiv:2306.01337},\n", + "}\n", + "```\n", + "\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "You can activate Spark as the parallel backend during parallel tuning in both AutoML and Hyperparameter Tuning, by setting the `use_spark` to `true`. FLAML will dispatch your job to the distributed Spark backend using joblib-spark. According to the documentation, to use FLAML with Spark, you need to prepare your data in pandas-on-spark format using the `flaml.automl.spark.utils.to_pandas_on_spark` function in the `flaml.automl.spark.utils` module. Then, you can pass pandas-on-spark data to FLAML as normal data using `dataframe` and `label`. For example, to use SparkML models for regression and train for 30 seconds with force cancel, you can use the following code snippet:\n", + "\n", + "```python\n", + "import flaml\n", + "from flaml.automl.spark.utils import to_pandas_on_spark\n", + "\n", + "# load your data into a pandas dataframe\n", + "train_data = ...\n", + "\n", + "psdf = to_pandas_on_spark(train_data)\n", + "\n", + "automl = flaml.AutoML()\n", + "settings = {\n", + " \"time_budget\": 30,\n", + " \"metric\": \"r2\",\n", + " \"task\": \"regression\",\n", + " \"use_spark\": True,\n", + " \"force_cancel\": True,\n", + "}\n", + "\n", + "automl.fit(\n", + " dataframe=psdf,\n", + " label=label_name,\n", + " **settings,\n", + ")\n", + "```\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "UPDATE CONTEXT. Please provide more information or context for me to assist you better.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[32mUpdating context and resetting conversation.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-01 08:51:06,587 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Using index: [{'id': '6682a6042cf0e270602c0fe1', 'name': 'vector_index', 'type': 'vectorSearch', 'status': 'READY', 'queryable': True, 'latestDefinitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12, 109000)}, 'latestDefinition': {'fields': [{'type': 'vector', 'numDimensions': 384, 'path': 'embedding', 'similarity': 'cosine'}]}, 'statusDetail': [{'hostname': 'shared-shard-00-search-6xag8e', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}, {'hostname': 'shared-shard-00-search-onamml', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}]}]\u001b[0m\n", + "2024-07-01 08:51:21,592 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Query: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\u001b[0m\n", + "2024-07-01 08:51:21,690 - autogen.agentchat.contrib.vectordb.mongodb - INFO - pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 60, 'numCandidates': 600, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query_text How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 60, 'numCandidates': 600, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\n", + "VectorDB returns doc_ids: [[ObjectId('6682a624b9758026836ffab5'), ObjectId('6682a624b9758026836ffad5')]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-01 08:51:22,212 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Using index: [{'id': '6682a6042cf0e270602c0fe1', 'name': 'vector_index', 'type': 'vectorSearch', 'status': 'READY', 'queryable': True, 'latestDefinitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12, 109000)}, 'latestDefinition': {'fields': [{'type': 'vector', 'numDimensions': 384, 'path': 'embedding', 'similarity': 'cosine'}]}, 'statusDetail': [{'hostname': 'shared-shard-00-search-6xag8e', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}, {'hostname': 'shared-shard-00-search-onamml', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}]}]\u001b[0m\n", + "2024-07-01 08:51:37,216 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Query: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\u001b[0m\n", + "2024-07-01 08:51:37,286 - autogen.agentchat.contrib.vectordb.mongodb - INFO - pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 100, 'numCandidates': 1000, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query_text How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 100, 'numCandidates': 1000, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\n", + "VectorDB returns doc_ids: [[ObjectId('6682a624b9758026836ffab5'), ObjectId('6682a624b9758026836ffad5')]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-01 08:51:37,885 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Using index: [{'id': '6682a6042cf0e270602c0fe1', 'name': 'vector_index', 'type': 'vectorSearch', 'status': 'READY', 'queryable': True, 'latestDefinitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12, 109000)}, 'latestDefinition': {'fields': [{'type': 'vector', 'numDimensions': 384, 'path': 'embedding', 'similarity': 'cosine'}]}, 'statusDetail': [{'hostname': 'shared-shard-00-search-6xag8e', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}, {'hostname': 'shared-shard-00-search-onamml', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}]}]\u001b[0m\n", + "2024-07-01 08:51:52,889 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Query: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\u001b[0m\n", + "2024-07-01 08:51:52,975 - autogen.agentchat.contrib.vectordb.mongodb - INFO - pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 140, 'numCandidates': 1400, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query_text How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 140, 'numCandidates': 1400, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\n", + "VectorDB returns doc_ids: [[ObjectId('6682a624b9758026836ffab5'), ObjectId('6682a624b9758026836ffad5')]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-01 08:51:53,494 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Using index: [{'id': '6682a6042cf0e270602c0fe1', 'name': 'vector_index', 'type': 'vectorSearch', 'status': 'READY', 'queryable': True, 'latestDefinitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12, 109000)}, 'latestDefinition': {'fields': [{'type': 'vector', 'numDimensions': 384, 'path': 'embedding', 'similarity': 'cosine'}]}, 'statusDetail': [{'hostname': 'shared-shard-00-search-6xag8e', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}, {'hostname': 'shared-shard-00-search-onamml', 'status': 'READY', 'queryable': True, 'mainIndex': {'status': 'READY', 'queryable': True, 'definitionVersion': {'version': 0, 'createdAt': datetime.datetime(2024, 7, 1, 12, 50, 12)}, 'definition': {'fields': [{'type': 'vector', 'path': 'embedding', 'numDimensions': 384, 'similarity': 'cosine'}]}}}]}]\u001b[0m\n", + "2024-07-01 08:52:08,496 - autogen.agentchat.contrib.vectordb.mongodb - INFO - Query: How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\u001b[0m\n", + "2024-07-01 08:52:08,542 - autogen.agentchat.contrib.vectordb.mongodb - INFO - pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 180, 'numCandidates': 1800, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query_text How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\n", + "pipeline: [{'$vectorSearch': {'index': 'vector_index', 'limit': 180, 'numCandidates': 1800, 'queryVector': [-0.08256451040506363, -0.07900252193212509, -0.05290786176919937, 0.021982736885547638, 0.046406690031290054, 0.027769701555371284, -0.02768588438630104, -0.020102187991142273, -0.05407266318798065, -0.061684805899858475, -0.03940979018807411, -0.029285598546266556, -0.1118478998541832, -0.03136416897177696, -0.04099257290363312, -0.07897000014781952, -0.02522769570350647, 0.043702732771635056, -0.030820483341813087, -0.041595760732889175, 0.10552595555782318, 0.0023172772489488125, 0.08983399718999863, 0.10865391790866852, -0.06146957352757454, 0.04154617711901665, 0.015428234823048115, 0.016568025574088097, 0.013623313046991825, -0.06059451401233673, 0.08428270369768143, 0.009563339874148369, -0.002620439976453781, 0.016997039318084717, -0.07201018929481506, -0.010901586152613163, -0.030768705531954765, -0.04398634657263756, -0.026716720312833786, -0.019298473373055458, 0.029043301939964294, -0.03137688338756561, -0.0516120120882988, -0.033414166420698166, 0.05385608226060867, -0.025596346706151962, -0.02077491395175457, -0.0634346529841423, 0.03223349153995514, 0.02784794755280018, -0.06079091876745224, -0.012161108665168285, -0.0933445394039154, -0.018985357135534286, -0.022000310942530632, 0.08059032261371613, 0.03905639797449112, 0.008981743827462196, -0.04856802150607109, -0.0195226538926363, -0.016003113240003586, -0.10165907442569733, -0.004733760375529528, 0.030122995376586914, -0.038355227559804916, 0.03839924931526184, -0.028533125296235085, 0.01822500303387642, 0.0707336813211441, -0.02592848241329193, 0.02241864986717701, 0.022557010874152184, 0.007257631979882717, 0.03511698544025421, 0.008497730828821659, 0.06233576685190201, 0.06869452446699142, 0.06520985811948776, -0.018009020015597343, 0.008016299456357956, -0.09440284222364426, -0.06914988905191422, -0.016991959884762764, -0.004849573597311974, 0.015289856120944023, -0.05368100106716156, -0.07648778706789017, 0.04355047643184662, -0.013986689038574696, 0.03536888584494591, 0.03178128972649574, 0.03904074802994728, 0.027542345225811005, 0.021311746910214424, -0.08981165289878845, 0.050620175898075104, 0.006543598137795925, 0.07310184836387634, -0.033499374985694885, -0.01851048693060875, -0.07171830534934998, -0.07049573212862015, -0.02946554869413376, 0.04081925004720688, -0.015752671286463737, -0.05440584942698479, -0.00638421019539237, -0.027693038806319237, -0.015809008851647377, -0.0794110968708992, 0.08307767659425735, -0.010127314366400242, 0.031197702512145042, -0.0325561985373497, 0.028586456552147865, 0.05326930806040764, -0.04397851228713989, -0.06359461694955826, 0.003676487598568201, 0.06998850405216217, -0.02999182790517807, 0.03461074084043503, 0.05651488155126572, -0.05784572660923004, 0.02231559529900551, -0.07732831686735153, -0.029416916891932487, 1.8518434945716996e-33, 0.0358523465692997, -0.002374001545831561, 0.009263500571250916, -0.05580880120396614, 0.030508413910865784, -0.037797845900058746, 0.01508091390132904, 0.02779262885451317, -0.04756521061062813, 0.010429342277348042, -0.005697719287127256, 0.03368696570396423, -0.014907917007803917, -0.02615354210138321, -0.05337945744395256, -0.08737822622060776, 0.04612358659505844, 0.016435381025075912, -0.03597096726298332, -0.06492944061756134, 0.11139646172523499, -0.04470240697264671, 0.013333962298929691, 0.06944458186626434, 0.04924115538597107, 0.021988168358802795, -0.0033458129037171602, -0.021327221766114235, 0.04618706554174423, 0.09092214703559875, -0.009819227270781994, 0.03574197739362717, -0.02589249238371849, 0.015359507873654366, 0.01923568733036518, 0.009884021244943142, -0.0687863752245903, 0.008688706904649734, 0.0003024878678843379, 0.006991893518716097, -0.07505182921886444, -0.045765507966279984, 0.005778071004897356, 0.0200499240309, -0.07049272209405899, -0.06168036535382271, 0.044801026582717896, 0.026470575481653214, 0.01803005486726761, 0.04355733096599579, 0.034672655165195465, -0.08011800795793533, 0.03965161740779877, 0.08112046867609024, 0.07237163931131363, 0.07554267346858978, -0.0966770201921463, 0.05703232064843178, 0.007653184700757265, 0.09404793381690979, 0.02874479629099369, 0.032439567148685455, -0.006544401869177818, 0.0747322142124176, -0.06779398024082184, -0.03769124671816826, 0.018574388697743416, -0.0027497054543346167, 0.05186106637120247, 0.045869190245866776, 0.052037931978702545, 0.00877095852047205, 0.00956355594098568, 0.06010708585381508, 0.07063814997673035, -0.05281956121325493, 0.11385682970285416, 0.0014734964352101088, -0.13000114262104034, 0.04160114377737045, 0.002756801201030612, -0.03354136645793915, -0.012316903099417686, -0.04667062684893608, -0.021649040281772614, 0.009122663177549839, 0.07305404543876648, 0.050488732755184174, 0.0037498027086257935, 0.06742933392524719, -0.09808871150016785, -0.02533995360136032, 0.07752660661935806, -0.008930775336921215, -0.020734407007694244, -8.718873943854186e-34, 0.030775681138038635, -0.04046367108821869, -0.07485030591487885, 0.06837300956249237, 0.03777360916137695, 0.03171695023775101, 0.038366734981536865, -0.009698187932372093, -0.06721752882003784, 0.03483430668711662, -0.03264770656824112, -0.004821446258574724, 0.017873667180538177, -0.01217806525528431, -0.06693356484174728, -0.042935941368341446, 0.07182027399539948, -0.023592444136738777, 0.010779321193695068, 0.03270953893661499, -0.03838556632399559, -0.010096886195242405, -0.058566078543663025, -0.06304068863391876, -0.013382021337747574, -0.011351224966347218, -0.08517401665449142, 0.007304960861802101, -0.04197632893919945, -0.008837309665977955, 0.000581165833864361, 0.009765408001840115, -0.02323746308684349, -0.07040572166442871, -0.0630621388554573, -0.01030951738357544, 0.07319610565900803, -0.002567168092355132, -0.00982675701379776, 0.08009836822748184, 0.06278694421052933, -0.053986601531505585, -0.13036444783210754, -0.05632428079843521, -0.012127791531383991, -0.00034488266101107, -0.05524465814232826, -0.019998280331492424, -0.041557829827070236, 0.07457990199327469, -0.004864905495196581, 0.0744631364941597, -0.038698967546224594, 0.11076352000236511, 0.08321533352136612, -0.1319902539253235, 0.05189663544297218, -0.08637715131044388, -0.047119464725255966, 0.0712425485253334, 0.038989413529634476, -0.06715074181556702, 0.0770900622010231, -0.016237575560808182, 0.16853967308998108, -0.003975923638790846, 0.11307050287723541, 0.07726389169692993, -0.028748558834195137, 0.04492560029029846, 0.0768602192401886, 0.0852692499756813, 0.021246735006570816, 0.11719376593828201, 0.0029091970063745975, -0.011192459613084793, -0.09389575570821762, 0.021549541503190994, -0.0055024465546011925, 0.032183919101953506, 0.0651387944817543, -0.0652405172586441, 0.03021097555756569, 0.1095665693283081, -0.02563057281076908, 0.05070950835943222, 0.09074468910694122, 0.08164751529693604, 0.039858028292655945, -0.045717816799879074, -0.01968374475836754, -0.01942502148449421, 0.020252034068107605, 0.028495490550994873, -0.014108758419752121, -2.6071681702433125e-08, -0.004948799964040518, -0.03374723717570305, -0.006966953631490469, 0.04770921543240547, 0.060589514672756195, 0.039017271250486374, -0.06870992481708527, 0.04758283868432045, -0.04153140261769295, -0.009761914610862732, 0.05678777024149895, -0.024886248633265495, 0.08310353755950928, 0.04019981995224953, 0.04347654804587364, -0.016476230695843697, 0.02281028777360916, 0.044384729117155075, 0.012391419149935246, 0.03150279074907303, 0.03414358198642731, 0.023670021444559097, -0.035867370665073395, 0.00584121560677886, 0.03878429904580116, -0.03416749835014343, 0.0317315049469471, 0.014832393266260624, 0.06329585611820221, -0.07007385790348053, -0.11312873661518097, -0.0667077898979187, 0.031542230397462845, 0.03318323940038681, -0.05146196484565735, -0.04369741305708885, 0.030556850135326385, 0.05148332566022873, -0.09324397146701813, 0.08804989606142044, -0.05473781377077103, 0.02356131188571453, -0.0072563826106488705, -0.013308629393577576, 0.022258494049310684, 0.047823697328567505, -0.014027439057826996, -0.018331162631511688, -0.02744504064321518, 0.027262693271040916, -0.03694259002804756, 0.04492212459445, 0.04835069552063942, 0.09086570143699646, -0.0022586847189813852, -0.03940355032682419, -0.005774456076323986, -0.06551025062799454, -0.04700932279229164, -0.00200175354257226, -0.039275478571653366, -0.04998438432812691, -0.08698498457670212, 0.015872927382588387], 'path': 'embedding'}}, {'$set': {'score': {'$meta': 'vectorSearchScore'}}}]\n", + "VectorDB returns doc_ids: [[ObjectId('6682a624b9758026836ffab5'), ObjectId('6682a624b9758026836ffad5')]]\n", + "\u001b[32mNo more context, will terminate.\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "TERMINATE\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "# reset the assistant. Always reset the assistant before starting a new conversation.\n", + "assistant.reset()\n", + "\n", + "# given a problem, we use the ragproxyagent to generate a prompt to be sent to the assistant as the initial message.\n", + "# the assistant receives the message and generates a response. The response will be sent back to the ragproxyagent for processing.\n", + "# The conversation continues until the termination condition is met, in RetrieveChat, the termination condition when no human-in-loop is no code block detected.\n", + "# With human-in-loop, the conversation will continue until the user says \"exit\".\n", + "code_problem = \"How can I use FLAML to perform a classification task and use spark to do parallel training. Train 30 seconds and force cancel jobs if time limit is reached.\"\n", + "chat_result = ragproxyagent.initiate_chat(\n", + " assistant, message=ragproxyagent.message_generator, problem=code_problem, search_string=\"spark\"\n", + ") # search_string is used as an extra filter for the embeddings search, in this case, we only want to search documents that contain \"spark\"." + ] + } + ], + "metadata": { + "front_matter": { + "description": "Explore the use of AutoGen's RetrieveChat for tasks like code generation from docstrings, answering complex questions with human feedback, and exploiting features like Update Context, custom prompts, and few-shot learning.", + "tags": [ + "RAG" + ] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "skip_test": "Requires interactive usage" + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/setup.py b/setup.py index 9117ed45cea..520dcc9ce78 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,8 @@ "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"], "retrievechat": retrieve_chat, "retrievechat-pgvector": retrieve_chat_pgvector, + + "retrievechat-mongodb": [*retrieve_chat, "pymongo>=4.0.0"], "retrievechat-qdrant": [*retrieve_chat, "qdrant_client", "fastembed>=0.3.1"], "autobuild": ["chromadb", "sentence-transformers", "huggingface-hub", "pysqlite3"], "teachable": ["chromadb"], diff --git a/test/agentchat/contrib/vectordb/test_mongodb.py b/test/agentchat/contrib/vectordb/test_mongodb.py new file mode 100644 index 00000000000..14d73ff33c7 --- /dev/null +++ b/test/agentchat/contrib/vectordb/test_mongodb.py @@ -0,0 +1,345 @@ +import logging +import os +from time import monotonic, sleep +from typing import List + +import pytest + +from autogen.agentchat.contrib.vectordb.base import Document + +try: + import pymongo + import sentence_transformers + + from autogen.agentchat.contrib.vectordb.mongodb import MongoDBAtlasVectorDB +except ImportError: + # To display warning in pyproject.toml [tool.pytest.ini_options] set log_cli = true + logger = logging.getLogger(__name__) + logger.warning(f"skipping {__name__}. It requires one to pip install pymongo or the extra [retrievechat-mongodb]") + pytest.skip(allow_module_level=True) + +from pymongo.collection import Collection + +logger = logging.getLogger(__name__) + +MONGODB_URI = os.environ.get("MONGODB_URI", "mongodb://localhost:64684/?directConnection=true") +MONGODB_DATABASE = os.environ.get("DATABASE", "autogen_test_db") +MONGODB_COLLECTION = os.environ.get("MONGODB_COLLECTION", "autogen_test_vectorstore") +MONGODB_INDEX = os.environ.get("MONGODB_INDEX", "vector_index") + +RETRIES = 10 +DELAY = 2 +TIMEOUT = 20.0 + + +def _wait_for_predicate(predicate, err, timeout=TIMEOUT, interval=DELAY): + """Generic to block until the predicate returns true + + Args: + predicate (Callable[, bool]): A function that returns a boolean value + err (str): Error message to raise if nothing occurs + timeout (float, optional): Length of time to wait for predicate. Defaults to TIMEOUT. + interval (float, optional): Interval to check predicate. Defaults to DELAY. + + Raises: + TimeoutError: _description_ + """ + start = monotonic() + while not predicate(): + if monotonic() - start > TIMEOUT: + raise TimeoutError(err) + sleep(DELAY) + + +def _delete_collections(database): + """Delete all collections within the database + + Args: + database (pymongo.Database): MongoDB Database Abstraction + """ + for collection_name in database.list_collection_names(): + database[collection_name].drop() + _wait_for_predicate(lambda: not database.list_collection_names(), "Not all collections deleted") + + +@pytest.fixture +def db(): + """VectorDB setup and teardown, including collections and search indexes""" + vectorstore = MongoDBAtlasVectorDB( + connection_string=MONGODB_URI, + database_name=MONGODB_DATABASE, + wait_until_ready=True, + overwrite=True, + ) + yield vectorstore + _delete_collections(vectorstore.db) + + +@pytest.fixture +def example_documents() -> List[Document]: + """Note mix of integers and strings as ids""" + return [ + Document(id=1, content="Dogs are tough.", metadata={"a": 1}), + Document(id=2, content="Cats have fluff.", metadata={"b": 1}), + Document(id="1", content="What is a sandwich?", metadata={"c": 1}), + Document(id="2", content="A sandwich makes a great lunch.", metadata={"d": 1, "e": 2}), + ] + + +@pytest.fixture +def db_with_indexed_clxn(): + """VectorDB with a collection created immediately""" + vectorstore = MongoDBAtlasVectorDB( + connection_string=MONGODB_URI, + database_name=MONGODB_DATABASE, + wait_until_ready=True, + collection_name=MONGODB_COLLECTION, + overwrite=True, + ) + yield vectorstore, vectorstore.db[MONGODB_COLLECTION] + _delete_collections(vectorstore.db) + + +def test_create_collection(db): + """ + def create_collection(collection_name: str, + overwrite: bool = False) -> Collection + Create a collection in the vector database. + - Case 1. if the collection does not exist, create the collection. + - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. + - Case 3. the collection exists and overwrite is False return the existing collection. + """ + collection_name = "test_collection" + + collection_case_1 = db.create_collection( + collection_name=collection_name, + ) + assert collection_case_1.name == collection_name + + collection_case_2 = db.create_collection( + collection_name=collection_name, + overwrite=True, + ) + assert collection_case_2.name == collection_name + + collection_case_3 = db.create_collection( + collection_name=collection_name, + ) + assert collection_case_3.name == collection_name + + +def test_get_collection(db): + collection_name = MONGODB_COLLECTION + + with pytest.raises(ValueError): + db.get_collection() + + collection_created = db.create_collection(collection_name) + assert isinstance(collection_created, Collection) + assert collection_created.name == collection_name + + collection_got = db.get_collection(collection_name) + assert collection_got.name == collection_created.name + assert collection_got.name == db.active_collection.name + + +def test_delete_collection(db): + assert MONGODB_COLLECTION not in db.list_collections() + collection = db.create_collection(MONGODB_COLLECTION) + assert MONGODB_COLLECTION in db.list_collections() + db.delete_collection(collection.name) + assert MONGODB_COLLECTION not in db.list_collections() + + +def test_insert_docs(db, example_documents): + # Test that there's an active collection + with pytest.raises(ValueError) as exc: + db.insert_docs(example_documents) + assert "No collection is specified" in str(exc.value) + + # Test upsert + db.insert_docs(example_documents, MONGODB_COLLECTION, upsert=True) + + # Create a collection + db.delete_collection(MONGODB_COLLECTION) + collection = db.create_collection(MONGODB_COLLECTION) + + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + found = list(collection.find({})) + assert len(found) == len(example_documents) + # Check that documents have correct fields, including "_id" and "embedding" but not "id" + assert all([set(doc.keys()) == {"_id", "content", "metadata", "embedding"} for doc in found]) + # Check ids + assert {doc["_id"] for doc in found} == {1, "1", 2, "2"} + # Check embedding lengths + assert len(found[0]["embedding"]) == 384 + + +def test_update_docs(db_with_indexed_clxn, example_documents): + db, collection = db_with_indexed_clxn + # Use update_docs to insert new documents + db.update_docs(example_documents, MONGODB_COLLECTION, upsert=True) + # Test that no changes were made to example_documents + assert set(example_documents[0].keys()) == {"id", "content", "metadata"} + assert collection.count_documents({}) == len(example_documents) + found = list(collection.find({})) + # Check that documents have correct fields, including "_id" and "embedding" but not "id" + assert all([set(doc.keys()) == {"_id", "content", "metadata", "embedding"} for doc in found]) + # Check ids + assert {doc["_id"] for doc in found} == {1, "1", 2, "2"} + + # Update an *existing* Document + updated_doc = Document(id=1, content="Cats are tough.", metadata={"a": 10}) + db.update_docs([updated_doc], MONGODB_COLLECTION) + assert collection.find_one({"_id": 1})["content"] == "Cats are tough." + + # Upsert a *new* Document + new_id = 3 + new_doc = Document(id=new_id, content="Cats are tough.") + db.update_docs([new_doc], MONGODB_COLLECTION, upsert=True) + assert collection.find_one({"_id": new_id})["content"] == "Cats are tough." + + # Attempting to use update to insert a new doc + # *without* setting upsert set to True + # is a no-op in MongoDB. # TODO Confirm behaviour and autogen's preference. + new_id = 4 + new_doc = Document(id=new_id, content="That is NOT a sandwich?") + db.update_docs([new_doc], MONGODB_COLLECTION) + assert collection.find_one({"_id": new_id}) is None + + +def test_delete_docs(db_with_indexed_clxn, example_documents): + db, clxn = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + # Delete the 1s + db.delete_docs(ids=[1, "1"], collection_name=MONGODB_COLLECTION) + # Confirm just the 2s remain + assert {2, "2"} == {doc["_id"] for doc in clxn.find({})} + + +def test_get_docs_by_ids(db_with_indexed_clxn, example_documents): + db, _ = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + + # Test without setting "include" kwarg + docs = db.get_docs_by_ids(ids=[2, "2"], collection_name=MONGODB_COLLECTION) + assert len(docs) == 2 + assert all([doc["id"] in [2, "2"] for doc in docs]) + assert set(docs[0].keys()) == {"id", "content", "metadata"} + + # Test with include + docs = db.get_docs_by_ids(ids=[2], include=["content"], collection_name=MONGODB_COLLECTION) + assert len(docs) == 1 + assert set(docs[0].keys()) == {"id", "content"} + + # Test with empty ids list + docs = db.get_docs_by_ids(ids=[], include=["content"], collection_name=MONGODB_COLLECTION) + assert len(docs) == 0 + + +def test_retrieve_docs_empty(db_with_indexed_clxn): + db, _ = db_with_indexed_clxn + assert db.retrieve_docs(queries=["Cats"], collection_name=MONGODB_COLLECTION, n_results=2) == [] + + +def test_retrieve_docs_populated_db_empty_query(db_with_indexed_clxn, example_documents): + db, _ = db_with_indexed_clxn + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + # Empty list of queries returns empty list of results + results = db.retrieve_docs(queries=[], collection_name=MONGODB_COLLECTION, n_results=2) + assert results == [] + + +def test_retrieve_docs(db_with_indexed_clxn, example_documents): + """Begin testing Atlas Vector Search + NOTE: Indexing may take some time, so we must be patient on the first query. + We have the wait_until_ready flag to ensure index is created and ready + Immediately adding documents and then querying is only standard for testing + """ + db, _ = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + + n_results = 2 # Number of closest docs to return + + def results_ready(): + results = db.retrieve_docs(queries=["Cats"], collection_name=MONGODB_COLLECTION, n_results=n_results) + return len(results[0]) == n_results + + _wait_for_predicate(results_ready, f"Failed to retrieve docs after waiting {TIMEOUT} seconds after each.") + + results = db.retrieve_docs(queries=["Cats"], collection_name=MONGODB_COLLECTION, n_results=n_results) + assert {doc[0]["id"] for doc in results[0]} == {1, 2} + + +def test_retrieve_docs_with_embedding(db_with_indexed_clxn, example_documents): + """Begin testing Atlas Vector Search + NOTE: Indexing may take some time, so we must be patient on the first query. + We have the wait_until_ready flag to ensure index is created and ready + Immediately adding documents and then querying is only standard for testing + """ + db, _ = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + + n_results = 2 # Number of closest docs to return + + def results_ready(): + results = db.retrieve_docs(queries=["Cats"], collection_name=MONGODB_COLLECTION, n_results=n_results) + return len(results[0]) == n_results + + _wait_for_predicate(results_ready, f"Failed to retrieve docs after waiting {TIMEOUT} seconds after each.") + + results = db.retrieve_docs( + queries=["Cats"], collection_name=MONGODB_COLLECTION, n_results=n_results, include_embedding=True + ) + assert {doc[0]["id"] for doc in results[0]} == {1, 2} + assert all(["embedding" in doc[0] for doc in results[0]]) + + +def test_retrieve_docs_multiple_queries(db_with_indexed_clxn, example_documents): + db, _ = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + n_results = 2 # Number of closest docs to return + + queries = ["Some good pets", "What kind of Sandwich?"] + + def results_ready(): + results = db.retrieve_docs(queries=queries, collection_name=MONGODB_COLLECTION, n_results=n_results) + return all([len(res) == n_results for res in results]) + + _wait_for_predicate(results_ready, f"Failed to retrieve docs after waiting {TIMEOUT} seconds after each.") + + results = db.retrieve_docs(queries=queries, collection_name=MONGODB_COLLECTION, n_results=2) + + assert len(results) == len(queries) + assert all([len(res) == n_results for res in results]) + assert {doc[0]["id"] for doc in results[0]} == {1, 2} + assert {doc[0]["id"] for doc in results[1]} == {"1", "2"} + + +def test_retrieve_docs_with_threshold(db_with_indexed_clxn, example_documents): + db, _ = db_with_indexed_clxn + # Insert example documents + db.insert_docs(example_documents, collection_name=MONGODB_COLLECTION) + + n_results = 2 # Number of closest docs to return + queries = ["Cats"] + + def results_ready(): + results = db.retrieve_docs(queries=queries, collection_name=MONGODB_COLLECTION, n_results=n_results) + return len(results[0]) == n_results + + _wait_for_predicate(results_ready, f"Failed to retrieve docs after waiting {TIMEOUT} seconds after each.") + + # Distance Threshold of .3 means that the score must be .7 or greater + # only one result should be that value + results = db.retrieve_docs( + queries=queries, collection_name=MONGODB_COLLECTION, n_results=n_results, distance_threshold=0.3 + ) + assert len(results[0]) == 1 + assert all([doc[1] >= 0.7 for doc in results[0]])