Skip to content

Commit

Permalink
optimize function. Instead of iterating over each character, guess at…
Browse files Browse the repository at this point in the history
… size and then iterate by token.
  • Loading branch information
dkirsche committed Feb 11, 2024
1 parent c221c99 commit 4e4cbb4
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions autogen/agentchat/contrib/capabilities/context_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _transform_messages(self, messages: List[Dict]) -> List[Dict]:

def truncate_str_to_tokens(text: str, max_tokens: int) -> str:
"""
Truncate a string so that number of tokens in less than max_tokens.
Truncate a string so that number of tokens is less than max_tokens.
Args:
content: String to process.
Expand All @@ -109,9 +109,11 @@ def truncate_str_to_tokens(text: str, max_tokens: int) -> str:
Returns:
Truncated string.
"""
truncated_string = ""
for char in text:
truncated_string += char
if token_count_utils.count_token(truncated_string) == max_tokens:
break
return truncated_string

tokens = text.split()
for token_count in range(max_tokens, 0, -1):
truncated_text_tokens = tokens[:token_count]
actual_token_count = token_count_utils.count_token(" ".join(truncated_text_tokens))
if actual_token_count <= max_tokens:
return " ".join(truncated_text_tokens)
return "" # Return empty string if no tokens are found

0 comments on commit 4e4cbb4

Please sign in to comment.