Skip to content

Commit

Permalink
feature(nyz): add new gym hybrid viz (#563)
Browse files Browse the repository at this point in the history
* feature(nyz): add new gym hybrid viz

* fix(nyz): fix yapf style
  • Loading branch information
PaParaZz1 authored Jan 2, 2023
1 parent 29a3378 commit 0a25e46
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 101 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ local.properties

# Graphics Interchange Format
*.gif
*.mp4
*.mpg

# RAW
*.raw
Expand Down Expand Up @@ -1425,4 +1427,4 @@ collect_demo_data_config.py
!ding/**/*.py
events.*

evogym/*
evogym/*
4 changes: 2 additions & 2 deletions ding/worker/collector/interaction_serial_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def eval(
eval_monitor.update_reward(env_id, reward)
return_info.append(t.info)
self._logger.info(
"[EVALUATOR]env {} finish episode, final reward: {}, current episode: {}".format(
"[EVALUATOR]env {} finish episode, final reward: {:.4f}, current episode: {}".format(
env_id, eval_monitor.get_latest_reward(env_id), eval_monitor.get_current_episode()
)
)
Expand Down Expand Up @@ -301,7 +301,7 @@ def eval(
stop_flag = episode_return >= self._stop_value and train_iter > 0
if stop_flag:
self._logger.info(
"[DI-engine serial pipeline] " + "Current episode_return: {} is greater than stop_value: {}".
"[DI-engine serial pipeline] " + "Current episode_return: {:.4f} is greater than stop_value: {}".
format(episode_return, self._stop_value) + ", so your RL agent is converged, you can refer to " +
"'log/evaluator/evaluator_logger.txt' for details."
)
Expand Down
4 changes: 1 addition & 3 deletions dizoo/gym_hybrid/config/gym_hybrid_ddpg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
env_id='Moving-v0', # ['Sliding-v0', 'Moving-v0']
n_evaluator_episode=5,
stop_value=1.8,
save_replay_gif=False,
replay_path_gif=None,
),
policy=dict(
cuda=True,
Expand Down Expand Up @@ -70,4 +68,4 @@
if __name__ == "__main__":
# or you can enter `ding -m serial -c gym_hybrid_ddpg_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline([main_config, create_config], seed=0)
serial_pipeline([main_config, create_config], seed=0, max_env_step=int(1e7))
9 changes: 2 additions & 7 deletions dizoo/gym_hybrid/config/gym_hybrid_hppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@
env_id='Moving-v0', # ['Sliding-v0', 'Moving-v0']
n_evaluator_episode=5,
stop_value=1.8,
save_replay_gif=False,
replay_path_gif=None,
),
policy=dict(
cuda=True,
priority=False,
action_space='hybrid',
recompute_adv=True,
model=dict(
Expand All @@ -34,14 +31,12 @@
epoch_per_collect=10,
batch_size=320,
learning_rate=3e-4,
value_weight=0.5,
entropy_weight=0.03,
clip_ratio=0.2,
adv_norm=True,
value_norm=True,
),
collect=dict(
n_sample=int(3200),
n_sample=3200,
discount_factor=0.99,
gae_lambda=0.95,
collector=dict(collect_print_freq=1000, ),
Expand All @@ -66,4 +61,4 @@
if __name__ == "__main__":
# or you can enter `ding -m serial -c gym_hybrid_hppo_config.py -s 0`
from ding.entry import serial_pipeline_onpolicy
serial_pipeline_onpolicy([main_config, create_config], seed=0)
serial_pipeline_onpolicy([main_config, create_config], seed=0, max_env_step=int(1e7))
7 changes: 1 addition & 6 deletions dizoo/gym_hybrid/config/gym_hybrid_mpdqn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@
env_id='Moving-v0', # ['Sliding-v0', 'Moving-v0']
n_evaluator_episode=5,
stop_value=1.8,
save_replay_gif=False,
replay_path_gif=None,
),
policy=dict(
cuda=True,
priority=False,
# (bool) Whether use Importance Sampling Weight to correct biased update. If True, priority must be True.
priority_IS_weight=False,
discount_factor=0.99,
nstep=1,
model=dict(
Expand Down Expand Up @@ -85,4 +80,4 @@
if __name__ == "__main__":
# or you can enter `ding -m serial -c gym_hybrid_mpdqn_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline([main_config, create_config], seed=0)
serial_pipeline([main_config, create_config], seed=0, max_env_step=int(1e7))
7 changes: 1 addition & 6 deletions dizoo/gym_hybrid/config/gym_hybrid_pdqn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@
env_id='Moving-v0', # ['Sliding-v0', 'Moving-v0']
n_evaluator_episode=5,
stop_value=1.8,
save_replay_gif=False,
replay_path_gif=None,
),
policy=dict(
cuda=True,
priority=False,
# (bool) Whether use Importance Sampling Weight to correct biased update. If True, priority must be True.
priority_IS_weight=False,
discount_factor=0.99,
nstep=1,
model=dict(
Expand Down Expand Up @@ -83,4 +78,4 @@
if __name__ == "__main__":
# or you can enter `ding -m serial -c gym_hybrid_pdqn_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline([main_config, create_config], seed=0)
serial_pipeline([main_config, create_config], seed=0, max_env_step=int(1e7))
24 changes: 7 additions & 17 deletions dizoo/gym_hybrid/entry/gym_hybrid_ddpg_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ding.config import compile_config
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager, DingEnvWrapper
from ding.envs import get_vec_env_setting, create_env_manager
from ding.envs import get_vec_env_setting
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.utils import set_pkg_seed
Expand All @@ -17,24 +17,14 @@


def main(main_cfg, create_cfg, seed=0):
cfg = compile_config(
main_cfg,
BaseEnvManager,
DDPGPolicy,
BaseLearner,
SampleSerialCollector,
InteractionSerialEvaluator,
AdvancedReplayBuffer,
create_cfg=create_cfg,
save_cfg=True
)

create_cfg.policy.type = create_cfg.policy.type + '_command'
env_fn = None
cfg = compile_config(cfg, seed=seed, env=env_fn, auto=True, create_cfg=create_cfg, save_cfg=True)
# Specify evaluation arguments
main_cfg.policy.load_path = './ckpt_best.pth.tar'
main_cfg.env.replay_path = './'
main_cfg.env.evaluator_env_num = 1 # only 1 env for save replay
cfg = compile_config(main_cfg, seed=seed, auto=True, create_cfg=create_cfg, save_cfg=True)
# Create main components: env, policy
env_fn, collector_env_cfg, evaluator_env_cfg = get_vec_env_setting(cfg.env)
evaluator_env = create_env_manager(cfg.env.manager, [partial(env_fn, cfg=c) for c in evaluator_env_cfg])
evaluator_env = BaseEnvManager([partial(env_fn, cfg=c) for c in evaluator_env_cfg], cfg.env.manager)

evaluator_env.enable_save_replay(cfg.env.replay_path) # switch save replay interface

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
70 changes: 28 additions & 42 deletions dizoo/gym_hybrid/envs/gym-hybrid/gym_hybrid/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import gym
import numpy as np
import cv2
import os
from gym import spaces
from gym.utils import seeding

gym.logger.set_level(40) # noqa
# gym.logger.set_level(40) # noqa

from .agents import BaseAgent, MovingAgent, SlidingAgent, HardMoveAgent

Expand Down Expand Up @@ -99,6 +101,12 @@ def __init__(

self.action_space = spaces.Tuple((spaces.Discrete(3), spaces.Box(parameters_min, parameters_max)))
self.observation_space = spaces.Box(np.ones(10), -np.ones(10))
dirname = os.path.dirname(__file__)
self.bg = cv2.imread(os.path.join(dirname, 'bg.jpg'))
self.bg = cv2.cvtColor(self.bg, cv2.COLOR_BGR2RGB)
self.bg = cv2.resize(self.bg, (800, 800))
self.target_img = cv2.imread(os.path.join(dirname, 'target.png'), cv2.IMREAD_UNCHANGED)
self.target_img = cv2.resize(self.target_img, (60, 60))

def seed(self, seed: Optional[int] = None) -> list:
self.np_random, seed = seeding.np_random(seed) # noqa
Expand Down Expand Up @@ -192,16 +200,32 @@ def render(self, mode='human'):
arrow.set_color(0, 0, 0)
self.viewer.add_geom(arrow)

target = rendering.make_circle(unit_x * self.target_radius)
target = rendering.make_circle(unit_x * self.target_radius, filled=False)
target_trans = rendering.Transform(translation=(unit_x * (1 + self.target.x), unit_y * (1 + self.target.y)))
target.add_attr(target_trans)
target.set_color(1, 0.5, 0.5)
target.set_color(0, 0.6, 0)
self.viewer.add_geom(target)

self.arrow_trans.set_rotation(self.agent.theta)
self.agent_trans.set_translation(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y))

return self.viewer.render(return_rgb_array=mode == 'rgb_array')
ret = self.viewer.render(return_rgb_array=mode == 'rgb_array')
# add background
ret = np.where(ret == 255, self.bg, ret)
# add target logo
# # x, y = int(unit_x * (1 + self.target.x)), int(unit_y * (1 - self.target.y))
# # x, y = x - 20, y + 25 # seed0
# target_area = ret[x:x+60, y:y+60]
# rgb_img = cv2.cvtColor(self.target_img[..., :3], cv2.COLOR_BGR2RGB)
# target_area = np.where(self.target_img[..., -1:] == 0, target_area, rgb_img)
# ret[x:x+60, y:y+60] = target_area
# add frame
frames = np.array([60, 60, 30]).reshape(1, 1, -1)
ret[:6] = frames
ret[:, :6] = frames
ret[-6:] = frames
ret[:, -6:] = frames
return ret

def close(self):
if self.viewer:
Expand Down Expand Up @@ -375,44 +399,6 @@ def distance(self) -> float:
def get_distance(x1: float, y1: float, x2: float, y2: float) -> float:
return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2))

def render(self, mode='human'):
screen_width = 400
screen_height = 400
unit_x = screen_width / 2
unit_y = screen_height / 2
agent_radius = 0.05

if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)

agent = rendering.make_circle(unit_x * agent_radius)
self.agent_trans = rendering.Transform(
translation=(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y))
) # noqa
agent.add_attr(self.agent_trans)
agent.set_color(0.1, 0.3, 0.9)
self.viewer.add_geom(agent)

t, r, m = 0.1 * unit_x, 0.04 * unit_y, 0.06 * unit_x
arrow = rendering.FilledPolygon([(t, 0), (m, r), (m, -r)])
self.arrow_trans = rendering.Transform(rotation=self.agent.theta) # noqa
arrow.add_attr(self.arrow_trans)
arrow.add_attr(self.agent_trans)
arrow.set_color(0, 0, 0)
self.viewer.add_geom(arrow)

target = rendering.make_circle(unit_x * self.target_radius)
target_trans = rendering.Transform(translation=(unit_x * (1 + self.target.x), unit_y * (1 + self.target.y)))
target.add_attr(target_trans)
target.set_color(1, 0.5, 0.5)
self.viewer.add_geom(target)

self.arrow_trans.set_rotation(self.agent.theta)
self.agent_trans.set_translation(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y))

return self.viewer.render(return_rgb_array=mode == 'rgb_array')

def close(self):
if self.viewer:
self.viewer.close()
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 10 additions & 17 deletions dizoo/gym_hybrid/envs/gym_hybrid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,19 @@ def default_config(cls: type) -> EasyDict:
return cfg

config = dict(
replay_path=None,
save_replay_gif=False,
replay_path_gif=None,
env_id='Moving-v0',
act_scale=True,
)

def __init__(self, cfg: EasyDict) -> None:
self._cfg = cfg
self._env_id = cfg.env_id
assert self._env_id in self.default_env_id
self._act_scale = cfg.act_scale
self._init_flag = False
self._replay_path = cfg.replay_path
self._save_replay_gif = cfg.save_replay_gif
self._replay_path_gif = cfg.replay_path_gif
self._replay_path = None
self._save_replay = False
self._save_replay_count = 0
if self._save_replay_gif:
self._frames = []
self._init_flag = False

def reset(self) -> np.ndarray:
if not self._init_flag:
Expand Down Expand Up @@ -89,19 +85,20 @@ def step(self, action: Dict) -> BaseEnvTimestep:
# we have already done the clip(-1,1) operation
action['action_args'][1] = affine_transform(action['action_args'][1], min_val=-1, max_val=1)
action = [action['action_type'], action['action_args']]
if self._save_replay_gif:
if self._save_replay:
self._frames.append(self._env.render(mode='rgb_array'))
obs, rew, done, info = self._env.step(action)
self._eval_episode_return += rew
if done:
info['eval_episode_return'] = self._eval_episode_return
if self._save_replay_gif:
if self._save_replay:
if self._env_id == 'HardMove-v0':
self._env_id = f'hardmove_n{self._cfg.num_actuators}'
path = os.path.join(
self._replay_path, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count)
)
self.display_frames_as_gif(self._frames, path)
self._frames = []
self._save_replay_count += 1

obs = to_ndarray(obs)
Expand Down Expand Up @@ -131,11 +128,6 @@ def random_action(self) -> Dict:
def __repr__(self) -> str:
return "DI-engine gym hybrid Env"

def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
if replay_path is None:
replay_path = './video'
self._replay_path = replay_path

@property
def observation_space(self) -> gym.spaces.Space:
return self._observation_space
Expand All @@ -151,9 +143,10 @@ def reward_space(self) -> gym.spaces.Space:
def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
if replay_path is None:
replay_path = './video'
self._save_replay = True
self._replay_path = replay_path
self._save_replay = True
self._save_replay_count = 0
self._frames = []

@staticmethod
def display_frames_as_gif(frames: list, path: str) -> None:
Expand Down
Binary file modified dizoo/gym_hybrid/moving_v0.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 0a25e46

Please sign in to comment.