-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandomAgent.py
68 lines (56 loc) · 2.2 KB
/
randomAgent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import matplotlib
matplotlib.use("TkAgg")
import gym
import gridworld
from gym import wrappers, logger
import numpy as np
import copy
class RandomAgent(object):
"""The world's simplest agent!"""
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
return self.action_space.sample()
if __name__ == '__main__':
env = gym.make("gridworld-v0")
env.seed(0) # Initialise le seed du pseudo-random
print(env.action_space) # Quelles sont les actions possibles
print(env.step(1)) # faire action 1 et retourne l'observation, le reward, et un done un booleen (jeu fini ou pas)
env.render() # permet de visualiser la grille du jeu
env.render(mode="human") #visualisation sur la console
statedic, mdp = env.getMDP() # recupere le mdp : statedic
print("Nombre d'etats : ",len(statedic)) # nombre d'etats ,statedic : etat-> numero de l'etat
state, transitions = list(mdp.items())[0]
print(state) # un etat du mdp
print(transitions) # dictionnaire des transitions pour l'etat : {action-> [proba,etat,reward,done]}
# Execution avec un Agent
agent = RandomAgent(env.action_space)
# Faire un fichier de log sur plusieurs scenarios
outdir = 'gridworld-v0/random-agent-results'
envm = wrappers.Monitor(env, directory=outdir, force=True, video_callable=False)
env.setPlan("gridworldPlans/plan0.txt", {0: -0.001, 3: 1, 4: 1, 5: -1, 6: -1})
env.seed() # Initialiser le pseudo aleatoire
episode_count = 1000
reward = 0
done = False
rsum = 0
FPS = 0.0001
for i in range(episode_count):
obs = envm.reset()
env.verbose = (i % 100 == 0 and i > 0) # afficher 1 episode sur 100
if env.verbose:
env.render(FPS)
j = 0
rsum = 0
while True:
action = agent.act(obs, reward, done)
obs, reward, done, _ = envm.step(action)
rsum += reward
j += 1
if env.verbose:
env.render(FPS)
if done:
print("Episode : " + str(i) + " rsum=" + str(rsum) + ", " + str(j) + " actions")
break
print("done")
env.close()