diff --git a/README.md b/README.md
index 127b15745c..dcb8d46c3b 100644
--- a/README.md
+++ b/README.md
@@ -283,6 +283,7 @@ P.S: The `.py` file in `Runnable Demo` can be found in `dizoo`
| 31 |[gym-pybullet-drones](https://github.com/utiasDSL/gym-pybullet-drones) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/gym-pybullet-drones/gym-pybullet-drones.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/gym_pybullet_drones/envs)
环境指南 |
| 32 |[beergame](https://github.com/OptMLGroup/DeepBeerInventory-RL) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/beergame/beergame.png) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/beergame/envs)
环境指南 |
| 33 |[classic_control/acrobot](https://github.com/openai/gym/tree/master/gym/envs/classic_control) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/classic_control/acrobot/acrobot.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/classic_control/acrobot/envs)
环境指南 |
+| 34 |[box2d/car_racing](https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/box2d/carracing/car_racing.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/box2d/carracing/envs)
环境指南 |
![discrete](https://img.shields.io/badge/-discrete-brightgreen) means discrete action space
diff --git a/dizoo/box2d/carracing/__init__.py b/dizoo/box2d/carracing/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dizoo/box2d/carracing/car_racing.gif b/dizoo/box2d/carracing/car_racing.gif
new file mode 100644
index 0000000000..5d3bdd38e9
Binary files /dev/null and b/dizoo/box2d/carracing/car_racing.gif differ
diff --git a/dizoo/box2d/carracing/config/__init__.py b/dizoo/box2d/carracing/config/__init__.py
new file mode 100644
index 0000000000..1571e58a64
--- /dev/null
+++ b/dizoo/box2d/carracing/config/__init__.py
@@ -0,0 +1 @@
+from .carracing_dqn_config import carracing_dqn_config, carracing_dqn_create_config
diff --git a/dizoo/box2d/carracing/config/carracing_dqn_config.py b/dizoo/box2d/carracing/config/carracing_dqn_config.py
new file mode 100644
index 0000000000..31dd42fca8
--- /dev/null
+++ b/dizoo/box2d/carracing/config/carracing_dqn_config.py
@@ -0,0 +1,63 @@
+from easydict import EasyDict
+
+nstep = 3
+carracing_dqn_config = dict(
+ exp_name='carracing_dqn_seed0',
+ env=dict(
+ collector_env_num=8,
+ evaluator_env_num=8,
+ env_id='CarRacing-v2',
+ continuous=False,
+ n_evaluator_episode=8,
+ stop_value=900,
+ # replay_path='./carracing_dqn_seed0/video',
+ ),
+ policy=dict(
+ cuda=True,
+ # load_path='carracing_dqn_seed0/ckpt/ckpt_best.pth.tar',
+ model=dict(
+ obs_shape=[3, 96, 96],
+ action_shape=5,
+ encoder_hidden_size_list=[64, 64, 128],
+ dueling=True,
+ ),
+ discount_factor=0.99,
+ nstep=nstep,
+ learn=dict(
+ update_per_collect=10,
+ batch_size=64,
+ learning_rate=0.0001,
+ target_update_freq=100,
+ ),
+ collect=dict(
+ n_sample=64,
+ ),
+ other=dict(
+ eps=dict(
+ type='exp',
+ start=0.95,
+ end=0.1,
+ decay=50000,
+ ),
+ replay_buffer=dict(replay_buffer_size=100000, )
+ ),
+ ),
+)
+carracing_dqn_config = EasyDict(carracing_dqn_config)
+main_config = carracing_dqn_config
+
+carracing_dqn_create_config = dict(
+ env=dict(
+ type='carracing',
+ import_names=['dizoo.box2d.carracing.envs.carracing_env'],
+ ),
+ env_manager=dict(type='subprocess'),
+ policy=dict(type='dqn'),
+)
+carracing_dqn_create_config = EasyDict(carracing_dqn_create_config)
+create_config = carracing_dqn_create_config
+
+if __name__ == "__main__":
+ # or you can enter `ding -m serial -c carracing_dqn_config.py -s 0`
+ from ding.entry import serial_pipeline
+ serial_pipeline([main_config, create_config], seed=0)
\ No newline at end of file
diff --git a/dizoo/box2d/carracing/envs/__init__.py b/dizoo/box2d/carracing/envs/__init__.py
new file mode 100644
index 0000000000..a36760ccf7
--- /dev/null
+++ b/dizoo/box2d/carracing/envs/__init__.py
@@ -0,0 +1 @@
+from .carracing_env import CarRacingEnv
diff --git a/dizoo/box2d/carracing/envs/carracing_env.py b/dizoo/box2d/carracing/envs/carracing_env.py
new file mode 100644
index 0000000000..39b82a2502
--- /dev/null
+++ b/dizoo/box2d/carracing/envs/carracing_env.py
@@ -0,0 +1,161 @@
+from typing import Optional
+import copy
+import os
+
+
+import gym
+import numpy as np
+from easydict import EasyDict
+
+from ding.envs import BaseEnv, BaseEnvTimestep
+from ding.envs import ObsPlusPrevActRewWrapper
+from ding.envs.common import affine_transform, save_frames_as_gif
+from ding.torch_utils import to_ndarray
+from ding.utils import ENV_REGISTRY
+
+
+@ENV_REGISTRY.register('carracing')
+class CarRacingEnv(BaseEnv):
+
+ config = dict(
+ replay_path=None,
+ save_replay_gif=False,
+ replay_path_gif=None,
+ action_clip=False,
+ )
+
+ @classmethod
+ def default_config(cls: type) -> EasyDict:
+ cfg = EasyDict(copy.deepcopy(cls.config))
+ cfg.cfg_type = cls.__name__ + 'Dict'
+ return cfg
+
+ def __init__(self, cfg: dict) -> None:
+ self._cfg = cfg
+ self._init_flag = False
+ # env_id:CarRacing-v2
+ self._env_id = cfg.env_id
+ self._replay_path = None
+ self._replay_path_gif = cfg.replay_path_gif
+ self._save_replay_gif = cfg.save_replay_gif
+ self._save_replay_count = 0
+ if cfg.continuous:
+ self._act_scale = cfg.act_scale # act_scale only works in continuous env
+ self._action_clip = cfg.action_clip
+ else:
+ self._act_scale = False
+
+ def reset(self) -> np.ndarray:
+ if not self._init_flag:
+ self._env = gym.make(self._cfg.env_id, continuous=self._cfg.continuous)
+ if self._replay_path is not None:
+ self._env = gym.wrappers.RecordVideo(
+ self._env,
+ video_folder=self._replay_path,
+ episode_trigger=lambda episode_id: True,
+ name_prefix='rl-video-{}'.format(id(self))
+ )
+ self._observation_space = gym.spaces.Box(
+ low=np.min(self._env.observation_space.low.astype(np.float32) / 255),
+ high=np.max(self._env.observation_space.high.astype(np.float32) / 255),
+ shape=(
+ self._env.observation_space.shape[2], self._env.observation_space.shape[0],
+ self._env.observation_space.shape[1]
+ ),
+ dtype=np.float32
+ )
+ self._action_space = self._env.action_space
+ self._reward_space = gym.spaces.Box(
+ low=self._env.reward_range[0], high=self._env.reward_range[1], shape=(1, ), dtype=np.float32
+ )
+ self._init_flag = True
+ if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed:
+ np_seed = 100 * np.random.randint(1, 1000)
+ self._env.seed(self._seed + np_seed)
+ elif hasattr(self, '_seed'):
+ self._env.seed(self._seed)
+ self._eval_episode_return = 0
+ obs = self._env.reset()
+ obs = obs.astype(np.float32) / 255
+ obs = obs.transpose(2, 0, 1)
+ obs = to_ndarray(obs)
+ if self._save_replay_gif:
+ self._frames = []
+ return obs
+
+ def close(self) -> None:
+ if self._init_flag:
+ self._env.close()
+ self._init_flag = False
+
+ def render(self) -> None:
+ self._env.render()
+
+ def seed(self, seed: int, dynamic_seed: bool = True) -> None:
+ self._seed = seed
+ self._dynamic_seed = dynamic_seed
+ np.random.seed(self._seed)
+
+ def step(self, action: np.ndarray) -> BaseEnvTimestep:
+ assert isinstance(action, np.ndarray), type(action)
+ if action.shape == (1, ):
+ action = action.item() # 0-dim array
+ if self._act_scale:
+ action = affine_transform(action, action_clip=self._action_clip, min_val=-1, max_val=1)
+ if self._save_replay_gif:
+ self._frames.append(self._env.render(mode='rgb_array'))
+ obs, rew, done, info = self._env.step(action)
+ obs = obs.astype(np.float32) / 255
+ obs = obs.transpose(2, 0, 1)
+ self._eval_episode_return += rew
+ if done:
+ info['eval_episode_return'] = self._eval_episode_return
+ if self._save_replay_gif:
+ if not os.path.exists(self._replay_path_gif):
+ os.makedirs(self._replay_path_gif)
+ path = os.path.join(
+ self._replay_path_gif, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count)
+ )
+ save_frames_as_gif(self._frames, path)
+ self._save_replay_count += 1
+
+ obs = to_ndarray(obs)
+ rew = to_ndarray([rew]).astype(np.float32) # wrapped to be transferred to a array with shape (1,)
+ return BaseEnvTimestep(obs, rew, done, info)
+
+ def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
+ if replay_path is None:
+ replay_path = './video'
+ self._replay_path = replay_path
+ self._save_replay_gif = True
+ self._save_replay_count = 0
+ # this function can lead to the meaningless result
+ self._env = gym.wrappers.RecordVideo(
+ self._env,
+ video_folder=self._replay_path,
+ episode_trigger=lambda episode_id: True,
+ name_prefix='rl-video-{}'.format(id(self))
+ )
+
+ def random_action(self) -> np.ndarray:
+ random_action = self.action_space.sample()
+ if isinstance(random_action, np.ndarray):
+ pass
+ elif isinstance(random_action, int):
+ random_action = to_ndarray([random_action], dtype=np.int64)
+ return random_action
+
+ @property
+ def observation_space(self) -> gym.spaces.Space:
+ return self._observation_space
+
+ @property
+ def action_space(self) -> gym.spaces.Space:
+ return self._action_space
+
+ @property
+ def reward_space(self) -> gym.spaces.Space:
+ return self._reward_space
+
+ def __repr__(self) -> str:
+ return "DI-engine CarRacing Env"
diff --git a/dizoo/box2d/carracing/envs/test_carracing_env.py b/dizoo/box2d/carracing/envs/test_carracing_env.py
new file mode 100644
index 0000000000..7eb4a75039
--- /dev/null
+++ b/dizoo/box2d/carracing/envs/test_carracing_env.py
@@ -0,0 +1,36 @@
+import pytest
+import numpy as np
+from easydict import EasyDict
+from carracing_env import CarRacingEnv
+
+
+@pytest.mark.envtest
+@pytest.mark.parametrize(
+ 'cfg', [
+ EasyDict({
+ 'env_id': 'CarRacing-v2',
+ 'continuous': False,
+ 'act_scale': False
+ })
+ ]
+)
+class TestCarRacing:
+
+ def test_naive(self, cfg):
+ env = CarRacingEnv(cfg)
+ env.seed(314)
+ assert env._seed == 314
+ obs = env.reset()
+ assert obs.shape == (3, 96, 96)
+ for i in range(10):
+ random_action = env.random_action()
+ timestep = env.step(random_action)
+ print(timestep)
+ assert isinstance(timestep.obs, np.ndarray)
+ assert isinstance(timestep.done, bool)
+ assert timestep.obs.shape == (3, 96, 96)
+ assert timestep.reward.shape == (1, )
+ assert timestep.reward >= env.reward_space.low
+ assert timestep.reward <= env.reward_space.high
+ print(env.observation_space, env.action_space, env.reward_space)
+ env.close()