Skip to content

Commit

Permalink
Fix docstring of get_or_create (microsoft#583)
Browse files Browse the repository at this point in the history
* Fix docstring of get_or_create

* Improve docstring
  • Loading branch information
thinkall committed Nov 12, 2023
1 parent 255bf22 commit 8863e03
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 10 deletions.
12 changes: 5 additions & 7 deletions autogen/agentchat/contrib/retrieve_user_proxy_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def __init__(
- customized_answer_prefix (Optional, str): the customized answer prefix for the retrieve chat. Default is "".
If not "" and the customized_answer_prefix is not in the answer, `Update Context` will be triggered.
- update_context (Optional, bool): if False, will not apply `Update Context` for interactive retrieval. Default is True.
- get_or_create (Optional, bool): if True, will create/recreate a collection for the retrieve chat.
This is the same as that used in chromadb. Default is False. Will be set to False if docs_path is None.
- get_or_create (Optional, bool): if True, will create/return a collection for the retrieve chat. This is the same as that used in chromadb.
Default is False. Will raise ValueError if the collection already exists and get_or_create is False. Will be set to True if docs_path is None.
- custom_token_count_function(Optional, Callable): a custom function to count the number of tokens in a string.
The function should take (text:str, model:str) as input and return the token_count(int). the retrieve_config["model"] will be passed in the function.
Default is autogen.token_count_utils.count_token that uses tiktoken, which may not be accurate for non-OpenAI models.
Expand Down Expand Up @@ -178,9 +178,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
self.customized_prompt = self._retrieve_config.get("customized_prompt", None)
self.customized_answer_prefix = self._retrieve_config.get("customized_answer_prefix", "").upper()
self.update_context = self._retrieve_config.get("update_context", True)
self._get_or_create = (
self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else False
)
self._get_or_create = self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else True
self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token)
self.custom_text_split_function = self._retrieve_config.get("custom_text_split_function", None)
self._context_max_tokens = self._max_tokens * 0.8
Expand Down Expand Up @@ -360,7 +358,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
n_results (int): the number of results to be retrieved.
search_string (str): only docs containing this string will be retrieved.
"""
if not self._collection or self._get_or_create:
if not self._collection or not self._get_or_create:
print("Trying to create collection.")
self._client = create_vector_db_from_dir(
dir_path=self._docs_path,
Expand All @@ -375,7 +373,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
custom_text_split_function=self.custom_text_split_function,
)
self._collection = True
self._get_or_create = False
self._get_or_create = True

results = query_vector_db(
query_texts=[problem],
Expand Down
2 changes: 1 addition & 1 deletion autogen/retrieve_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def create_vector_db_from_dir(
db_path (Optional, str): the path to the chromadb. Default is "/tmp/chromadb.db".
collection_name (Optional, str): the name of the collection. Default is "all-my-documents".
get_or_create (Optional, bool): Whether to get or create the collection. Default is False. If True, the collection
will be recreated if it already exists.
will be returned if it already exists. Will raise ValueError if the collection already exists and get_or_create is False.
chunk_mode (Optional, str): the chunk mode. Default is "multi_lines".
must_break_at_empty_line (Optional, bool): Whether to break at empty line. Default is True.
embedding_model (Optional, str): the embedding model to use. Default is "all-MiniLM-L6-v2". Will be ignored if
Expand Down
4 changes: 2 additions & 2 deletions notebook/agentchat_RetrieveChat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@
" \"model\": config_list[0][\"model\"],\n",
" \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),\n",
" \"embedding_model\": \"all-mpnet-base-v2\",\n",
" \"get_or_create\": False, # set to True if you want to recreate the collection\n",
" \"get_or_create\": True, # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually\n",
" },\n",
")"
]
Expand Down Expand Up @@ -4172,7 +4172,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 8863e03

Please sign in to comment.