From c19038a32b41abf4ba25172752cfe1d1ad056313 Mon Sep 17 00:00:00 2001 From: Jinyu Wang Date: Fri, 27 Oct 2023 13:57:50 +0800 Subject: [PATCH] fix env_sampler eval info list issue --- maro/rl/rollout/env_sampler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/maro/rl/rollout/env_sampler.py b/maro/rl/rollout/env_sampler.py index 2825ef4aa..0dc311e8c 100644 --- a/maro/rl/rollout/env_sampler.py +++ b/maro/rl/rollout/env_sampler.py @@ -533,7 +533,7 @@ def sample( return { "experiences": [total_experiences], - "info": [deepcopy(self._info)], # TODO: may have overhead issues. Leave to future work. + "info": [deepcopy(self._info)], } def set_policy_state(self, policy_state_dict: Dict[str, dict]) -> None: @@ -592,7 +592,7 @@ def eval(self, policy_state: Dict[str, Dict[str, Any]] = None, num_episodes: int self._step(list(env_action_dict.values())) cache_element.next_state = self._state - if self._reward_eval_delay is None: # TODO: necessary to calculate reward in eval()? + if self._reward_eval_delay is None: self._calc_reward(cache_element) self._post_eval_step(cache_element) @@ -606,7 +606,7 @@ def eval(self, policy_state: Dict[str, Dict[str, Any]] = None, num_episodes: int self._calc_reward(cache_element) self._post_eval_step(cache_element) - info_list.append(self._info) + info_list.append(deepcopy(self._info)) return {"info": info_list}