Future-House · jamesbraza · Nov 26, 2024 · Nov 15, 2024
diff --git a/src/aviary/env.py b/src/aviary/env.py
@@ -155,7 +155,7 @@ def filter_invalid_tool_calls(
     async def exec_tool_calls(
         self,
         message: ToolRequestMessage,
-        ordered: bool = False,
+        concurrency: bool = True,
         handle_tool_exc: bool = False,
         handle_invalid_tool_calls: bool = True,
         **function_kwargs,
@@ -165,8 +165,8 @@ async def exec_tool_calls(
 
         Args:
             message: ToolRequestMessage containing the tool calls.
-            ordered: Opt-in flag for forcing sequential execution (according to order
-                in the above message), otherwise tool calls are made concurrently.
+            concurrency: Flag to set True (default) to concurrently execute tool calls,
+                otherwise set False to execute tools sequentially.
             handle_tool_exc: Opt-in flag to suppress Exceptions and return them as a
                 ToolResponseMessage.
             handle_invalid_tool_calls: Flag to handle invalid tool calls by returning
@@ -249,7 +249,7 @@ async def _exec_tool_call(tool_call: ToolCall) -> ToolResponseMessage:
                 for tool_call in invalid_action.tool_calls
             ]
 
-        if not ordered:
+        if concurrency:
             valid_responses = await asyncio.gather(
                 *(_exec_tool_call(tc) for tc in valid_action.tool_calls)
             )
@@ -435,9 +435,11 @@ def __init__(
         self,
         task: str | None = None,
         end_immediately: bool = True,
+        concurrent_tool_calls: bool = True,
     ):
         self.end_immediately = end_immediately
         self.task = task
+        self.concurrent_tool_calls = concurrent_tool_calls
 
     @classmethod
     def from_task(cls, task: str) -> DummyEnv:
@@ -447,9 +449,7 @@ async def step(
         self, action: ToolRequestMessage
     ) -> tuple[Messages, float, bool, bool]:
         msgs: Messages = await self.exec_tool_calls(
-            action,
-            state=self.state,
-            ordered=True,  # for unit tests
+            action, state=self.state, concurrency=self.concurrent_tool_calls
         )
         self.state.messages.extend(msgs)
         return msgs, self.state.reward, self.state.done, False

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -3,6 +3,7 @@
 import pathlib
 import re
 import tempfile
+import time
 from typing import ClassVar
 
 import litellm
@@ -193,9 +194,19 @@ async def test_multiple_calls(dummy_env: DummyEnv) -> None:
     assert done
 
 
+@pytest.mark.parametrize("concurrent_tool_calls", [False, True])
 @pytest.mark.asyncio
-async def test_invalid_tool_call(dummy_env: DummyEnv) -> None:
+async def test_invalid_tool_call(
+    dummy_env: DummyEnv, concurrent_tool_calls: bool
+) -> None:
+    def sleep(duration: float) -> None:
+        """Sleep for the input duration in seconds."""
+        time.sleep(duration)
+
+    sleep_tool = Tool.from_function(sleep, allow_empty_param_descriptions=True)
     _, tools = await dummy_env.reset()
+    dummy_env.tools.append(sleep_tool)
+    dummy_env.concurrent_tool_calls = concurrent_tool_calls
 
     obs, *_ = await dummy_env.step(
         ToolRequestMessage(tool_calls=[ToolCall.from_name("invalid_tool")])
@@ -206,12 +217,17 @@ async def test_invalid_tool_call(dummy_env: DummyEnv) -> None:
 
     # check that order is preserved even with invalid tool calls
     tool_calls = [
-        ToolCall.from_name(tools[0].info.name, story="Hello, how are you?"),
+        ToolCall.from_tool(sleep_tool, duration=0.1),
         ToolCall.from_name("invalid_tool"),
         ToolCall.from_name("invalid_tool"),
-        ToolCall.from_name(tools[0].info.name, story="Hello, how are you?"),
+        ToolCall.from_tool(sleep_tool, duration=0.1),
     ]
+    tic = time.perf_counter()
     obs, *_ = await dummy_env.step(ToolRequestMessage(tool_calls=tool_calls))
+    if concurrent_tool_calls:
+        assert time.perf_counter() - tic < 0.15
+    else:
+        assert time.perf_counter() - tic > 0.15
     assert obs
     for o, t in zip(obs, tool_calls, strict=True):
         assert o.tool_call_id == t.id