Future-House · whitead · Oct 22, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -76,7 +76,7 @@ repos:
       - id: mypy
         additional_dependencies:
           - fastapi>=0.109 # Match pyproject.toml
-          - fhaviary>=0.6 # Match pyproject.toml
+          - fhaviary>=0.8 # Match pyproject.toml
           - httpx
           - litellm>=1.49.3 # Match pyproject.toml
           - numpy>=1.20 # Match pyproject.toml

diff --git a/ldp/agent/agent.py b/ldp/agent/agent.py
@@ -80,6 +80,10 @@ def named_ops(self) -> Iterable[tuple[str, Op]]:
         """Analogous to torch.nn.Module.named_parameters()."""
         return _find_ops(self)
 
+    @classmethod
+    def from_name(cls, name: str, **kwargs) -> Agent:
+        return _AGENT_REGISTRY[name](**kwargs)
+
 
 class AgentConfig(BaseModel):
     """Configuration for specifying the type of agent i.e. the subclass of Agent above."""
@@ -96,7 +100,7 @@ class AgentConfig(BaseModel):
     )
 
     def construct_agent(self) -> Agent:
-        return _AGENT_REGISTRY[self.agent_type](**self.agent_kwargs)
+        return Agent.from_name(self.agent_type, **self.agent_kwargs)
 
     def __hash__(self) -> int:
         return hash(self.agent_type + json.dumps(self.agent_kwargs, sort_keys=True))

diff --git a/ldp/alg/callbacks.py b/ldp/alg/callbacks.py
@@ -475,3 +475,53 @@ async def after_eval_loop(self) -> None:
         await super().after_eval_loop()  # Call the parent to compute means
         if self.eval_means:
             self._log_filtered_metrics(self.eval_means, step_type="Eval")
+
+
+class TerminalLoggingCallback(Callback):
+    """Callback that prints action, observation, and timing information to the terminal."""
+
+    def __init__(self):
+        self.start_time = None
+        # try now, rather than start running and die
+        try:
+            from rich.pretty import pprint  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                f"rich is required for {type(self).__name__}. Please install it with `pip install rich`."
+            )
+
+    async def before_transition(
+        self,
+        traj_id: str,
+        agent: Agent,
+        env: Environment,
+        agent_state: Any,
+        obs: list[Message],
+    ) -> None:
+        """Start the timer before each transition."""
+        self.start_time = time.time()
+
+    async def after_agent_get_asv(
+        self,
+        traj_id: str,
+        action: OpResult[ToolRequestMessage],
+        next_agent_state: Any,
+        value: float,
+    ) -> None:
+        from rich.pretty import pprint
+        print("\nAction:")
+        pprint(action.value, expand_all=True)
+
+    async def after_env_step(
+        self, traj_id: str, obs: list[Message], reward: float, done: bool, trunc: bool
+    ) -> None:
+        from rich.pretty import pprint
+        # Compute elapsed time
+        if self.start_time is not None:
+            elapsed_time = time.time() - self.start_time
+            self.start_time = None  # Reset timer
+        else:
+            elapsed_time = 0.0
+        print("\nObservation:")
+        pprint(obs, expand_all=True)
+        print(f"Elapsed time: {elapsed_time:.2f} seconds")
diff --git a/ldp/data_structures.py b/ldp/data_structures.py
@@ -126,6 +126,7 @@ def from_jsonl(cls, filename: str | os.PathLike) -> Self:
         return traj
 
     def compute_discounted_returns(self, discount: float = 1.0) -> list[float]:
+        """Compute the discounted returns for each step in the trajectory."""
         return discounted_returns(
             rewards=[step.reward for step in self.steps],
             terminated=[step.truncated for step in self.steps],

diff --git a/ldp/main.py b/ldp/main.py
@@ -0,0 +1,73 @@
+import argparse
+import asyncio
+import pickle
+from os import PathLike
+from pathlib import Path
+
+from aviary.env import Environment
+
+from ldp.agent import Agent
+from ldp.alg.callbacks import TerminalLoggingCallback
+from ldp.alg.rollout import RolloutManager
+
+
+def agent_factory(agent: Agent | str | PathLike) -> Agent:
+    if isinstance(agent, Agent):
+        return agent
+
+    if isinstance(agent, str):
+        try:
+            return Agent.from_name(agent)
+        except KeyError:
+            pass
+
+    path = Path(agent)
+    if not path.exists():
+        raise ValueError(f"Could not resolve agent: {agent}")
+
+    with path.open("rb") as f:
+        return pickle.load(f)  # noqa: S301
-    path = Path(agent)
-    if not path.exists():
-        raise ValueError(f"Could not resolve agent: {agent}")
-
-    with path.open("rb") as f:
-        return pickle.load(f)  # noqa: S301
+    try:
+        with path.open("rb") as f:
+            return pickle.load(f)  # noqa: S301
+    except FileNotFoundError:
+        raise ValueError(f"Could not resolve agent: {agent}") from None
-    path = Path(agent)
-    if not path.exists():
-        raise ValueError(f"Could not resolve agent: {agent}")
-
-    with path.open("rb") as f:
-        return pickle.load(f)  # noqa: S301
+    try:
+        with path.open("rb") as f:
+            return pickle.load(f)  # noqa: S301
+    except FileNotFoundError:
+        raise ValueError(f"Could not resolve agent: {agent}") from None
+
+
+def environment_factory(environment: Environment | str, task: str) -> Environment:
+    if isinstance(environment, Environment):
+        return environment
+
+    if isinstance(environment, str):
+        try:
+            return Environment.from_name(environment, task=task)
+        except ValueError:
+            pass
+
+    raise ValueError(
+        f"Could not resolve environment: {environment}. Available environments: {Environment.available()}"
+    )
+
+
+async def main(
+    task: str,
+    environment: Environment | str,
+    agent: Agent | str | PathLike = "SimpleAgent",
+):
+    agent = agent_factory(agent)
+
+    callback = TerminalLoggingCallback()
+    rollout_manager = RolloutManager(agent=agent, callbacks=[callback])
+
+    _ = await rollout_manager.sample_trajectories(
+        environment_factory=lambda: environment_factory(environment, task)
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("task", help="Task to prompt environment with.")
+    parser.add_argument(
+        "--env", required=True, help="Environment to sample trajectories from."
+    )
+    parser.add_argument(
+        "--agent", default="SimpleAgent", help="Agent to sample trajectories with."
+    )
+    args = parser.parse_args()
+
+    asyncio.run(main(args.task, args.env, args.agent))
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,7 @@ classifiers = [
 dependencies = [
     "aiofiles",
     "dm-tree",
-    "fhaviary>=0.6",  # For MalformedMessageError
+    "fhaviary>=0.8",  # For from_task
     "httpx",
     "litellm>=1.40.15",  # For LITELLM_LOG addition
     "networkx[default]~=3.4",  # Pin for pydot fix
@@ -317,6 +317,7 @@ ignore = [
     "ARG003",  # Thrown all the time when we are subclassing
     "ASYNC109",  # Buggy, SEE: https://github.com/astral-sh/ruff/issues/12353
     "ASYNC2",  # It's ok to mix async and sync ops (like opening a file)
+    "B904",
     "BLE001",  # Don't care to enforce blind exception catching
     "COM812",  # Trailing comma with black leads to wasting lines
     "D100",  # D100, D101, D102, D103, D104, D105, D106, D107: don't always need docstrings