This repository has been archived by the owner on Oct 6, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtme_3_sarsa_gridworld.py
69 lines (56 loc) · 2.01 KB
/
tme_3_sarsa_gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gym
from torch.utils.tensorboard import SummaryWriter
from agent.q_learning import *
from experiment import Experiment
import gridworld
from logger import get_logger
number_of_episodes = 3000
max_steps = 1000
show_every = 1000 # Number of episodes.
pause = 0.1
plan = 2
if __name__ == "__main__":
env = gym.make("gridworld-v0")
env.setPlan(f"gridworldPlans/plan{plan}.txt", {0: -0.001, 3: 1, 4: 1, 5: -1, 6: -1})
obs = env.reset()
# Create a new agent here.
experiment = Experiment.create(
base_name=f"q_learning/sarsa_decay_gridworld-v0-plan{plan}",
model_class=QLearning,
hp={
"env": env,
"action_space": range(env.action_space.n),
"observation_shape": obs,
# "action_strategy": EpsilonGreedy(epsilon=0.1),
"action_strategy": EpsilonGreedyDecay(epsilon=0.1, alpha=1000),
"update_strategy": SARSA(alpha=0.3, gamma=0.9),
"action_to_str": {0: "south", 1: "north", 2: "west", 3: "east"},
},
)
experiment.save()
logger = get_logger(experiment.name, file_path=experiment.log_path)
writer = SummaryWriter(
log_dir=experiment.writer_path, purge_step=experiment.episode
)
experiment.info(logger)
while experiment.episode < number_of_episodes:
show = (experiment.episode + 1) % show_every == 0
# Reset the environment and the agent.
experiment.model.reset()
done = False
episode_reward = 0
while experiment.model.t < max_steps and not done:
if show:
env.render(pause=pause)
reward, done, _ = experiment.model.step()
episode_reward += reward
experiment.episode += 1
# Log.
if show:
logger.info(f"Episode {experiment.episode}: reward = {episode_reward}.")
writer.add_scalars(
"train",
{"reward": episode_reward, "steps": experiment.model.t},
global_step=experiment.episode,
)
env.close()