-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathai.py
94 lines (73 loc) · 3.1 KB
/
ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gym
import numpy as np
from agent_gym import AGENT_GYM
import config, utils
import matplotTest as draw
import mapGenerator as MG
import copy
from whca import WHCA
# source_pos = [[0,0]]
# hole_pos= [[4,4],[0,4],[4,0]]
# agent_num = 20
total_time = 1000
# hole_city = [0,1,2]
# city_dis = [0.33, 0.33, 0.34]
# there are some problems in randomcity()
# so hole_city and city_dis are inited by hand
source_pos, hole_pos = MG.bigMap(config.Map.Height, config.Map.Width)
hole_city = [0,1,2,2,1,3]
color = draw.randomcolor(4)
color[4] = [0.9, 0.9, 0.9]
city_dis = [0.12,0.33,0.37,0.18]
window = 10
for agent_num in range(1, 50):
env = AGENT_GYM(source_pos, hole_pos, agent_num, total_time, hole_city, city_dis)
env.seed(config.Game.Seed)
#print 'agent_num: ' + str(agent_num)
observation = env.reset()
#print observation
[agent_pos, agent_city, agent_reward, hole_reward, source_reward] = observation
whca = WHCA(window, source_pos, hole_pos, hole_city, agent_num)
agent_city_old = copy.deepcopy(agent_city)
agent_pos_old = copy.deepcopy(agent_pos)
for time in range(total_time):
#print ['*** time ***', time]
env.render()
# random action
# action = []
# for i in range(agent_num):
# r = np.random.random_integers(0, 4)
# if r < 4:
# action.append(utils.dirs[r])
# else:
# action.append([0,0])
# whca
action = whca.getJointAction(agent_pos, agent_city, time)
#print ['action', action]
observation, reward, done, info = env.step(action)
[agent_pos, agent_city, agent_reward, hole_reward, source_reward] = observation
#print ['agent_pos', agent_pos]
'''
if True:
# make pics
for i in range(len(agent_pos)):
agent_pos_old[i][0] = (agent_pos[i][0] + agent_pos_old[i][0]) / 2.0
agent_pos_old[i][1] = (agent_pos[i][1] + agent_pos_old[i][1]) / 2.0
# so many params...
if not time == 0:
draw.draw_map([config.Map.Width, config.Map.Height], source_pos, hole_pos, hole_city, agent_pos_old,
agent_city_old,color,"mapOutput", "show", 2 * time - 1, agent_reward, hole_reward, source_reward, city_dis)
draw.draw_map([config.Map.Width, config.Map.Height], source_pos, hole_pos, hole_city, agent_pos, agent_city,
color, "mapOutput", "show", 2 * time, agent_reward, hole_reward, source_reward, city_dis)
for i in range(len(agent_pos)):
agent_city_old[i] = agent_city[i]
agent_pos_old[i][0] = agent_pos[i][0]
agent_pos_old[i][1] = agent_pos[i][1]
'''
if done:
#print("Episode finished after {} timesteps".format(time+1))
break
print str(agent_num) + '\t' + str(sum(agent_reward))
# draw.save_video("mapOutput", "show", total_time) # avi with cv
# draw.save_video2("mapOutput", "show", total_time) # gif with imageio
# draw.save_video3("mapOutput", "show", total_time) # mp4 with imageio