-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcontinuous_control_DDPG.py
108 lines (86 loc) · 3.93 KB
/
continuous_control_DDPG.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
import numpy as np
import pandas as pd
from collections import deque
from unityagents import UnityEnvironment
import random
import matplotlib.pyplot as plt
# %matplotlib inline
from ddpg_agent import Agent
def plot_scores(scores, rolling_window=10, save_fig=False):
"""Plot scores and optional rolling mean using specified window."""
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.title(f'scores')
rolling_mean = pd.Series(scores).rolling(rolling_window).mean()
plt.plot(rolling_mean)
if save_fig:
plt.savefig(f'figures_scores.png', bbox_inches='tight', pad_inches=0)
def ddpg(n_episodes=3000, print_every=100):
scores_deque = deque(maxlen=print_every)
scores = []
for i_episode in range(1, n_episodes+1):
env_info = env.reset(train_mode=True)[brain_name]
state = env_info.vector_observations[0]
agent.reset()
score = 0
while True:
action = agent.act(state)
env_info = env.step(action)[brain_name]
next_state = env_info.vector_observations[0]
reward = env_info.rewards # get reward (for each agent)
done = env_info.local_done # see if episode finished
agent.step(state, action, reward, next_state, done)
score += reward[0] # update the score (for each agent)
state = next_state # roll over states to next time step
if np.any(done): # exit loop if episode finished
break
# Save scores and compute average score over last 100 episodes
scores_deque.append(score)
scores.append(score)
avg_score = np.mean(scores_deque)
print('\rEpisode {}\tAverage Score: {:.2f}\tScore: {:.2f}'.format(i_episode, avg_score, score), end="")
if i_episode % 100 == 0:
torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
# Early stop
if avg_score > 30:
print('\rEnvironment solved in {} episodes with an Average Score of {:.2f}'.format(i_episode, avg_score))
return scores
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, avg_score))
return scores
# select this option to load version 1 (with a single agent) of the environment
# env = UnityEnvironment(file_name='./Reacher_Linux/Reacher.x86_64')
env = UnityEnvironment(file_name='./Reacher_Linux/Reacher_One_Linux_NoVis/Reacher_One_Linux_NoVis.x86_64')
# env = UnityEnvironment(file_name='./Reacher_Linux/Reacher_Linux_NoVis/Reacher.x86_64')
# select this option to load version 2 (with 20 agents) of the environment
# env = UnityEnvironment(file_name='/data/Reacher_Linux_NoVis/Reacher.x86_64')
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]
# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)
# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)
# examine the state space
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])
agent = Agent(state_size=state_size, action_size=action_size,
num_agents=num_agents, random_seed=42)
scores = ddpg()
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()
env.close()