-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment_basic.py
202 lines (159 loc) · 6.02 KB
/
environment_basic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import time
import numpy as np
import random
import copy
np.random.seed(1)
class Env:
def __init__(self, render_speed=0.01, *args, **kwargs):
self.render_speed = render_speed
self.action_space = ['u', 'd', 'l', 'r']
self.action_size = len(self.action_space)
self.agent = {"pos": [0, 0]}
# 환경 환경설정 부분
self.map_size = [5, 6]
self.type_count = 2
self.base_map = self.create_base_map()
# self.map = [] 출력할때만 필요해서 삭제
self.counter = 0
self.rewards = []
self.objects = []
self.goal = []
# 환경변수_불러오기
self.env_var = kwargs['env_var']
'''
step_deduction: 스텝 소득 없을시 감점사항
objects_point_add: object 가점 사항
end_point_add: 종료지점 가점 사항
'''
self.checkPoint = False
# new methods
def check_if_reward(self):
check_list = dict()
check_list['if_goal'] = False
rewards = 0
for i, obj in enumerate(self.objects):
if obj['pos'] == self.agent['pos'] and obj['type'] == 0:
if not self.checkPoint:
# rewards += reward['reward']
rewards += self.env_var['objects_point_add']
self.checkPoint = True
if self.checkPoint and self.agent['pos'][1] == 5:
rewards += self.env_var['end_point_add']
check_list['if_goal'] = True
if rewards == 0:
rewards = self.env_var['step_deduction']
check_list['rewards'] = rewards
return check_list
def place_objects(self, count, work_range, except_position=[], self_place=[], types_count=[]):
"""# 가능한 y범위(세로) x범위(가로)+ 제외할 x,y 좌표 리스트
# 랜덤 개수범위입력 -> 랜덤 배치
# 수동배치도 가능"""
result = []
buffer = [i+1 for i in range(work_range[0]*work_range[1])]
# 수동 추가
if len(self_place):
result += self_place
# 추첨 제외
for i in (self_place+except_position):
if not(i[0]+1 > work_range[0] or i[1]+1 > work_range[1]):
buffer.pop(buffer.index(work_range[0]*i[0]+i[1]+1))
# 추첨
if len(buffer) < count:
count = len(buffer)
result_buffer = random.sample(buffer, count)
type_que = [0, 0]
for i in result_buffer:
if type_que[0]+1 != self.type_count:
type_temp = type_que[0]
type_que[1] += 1
if types_count[type_que[0]] == type_que[1]:
type_que[0] += 1
type_que[1] = 0
else:
type_temp = self.type_count-1
result.append({
"pos": [(i - 1) // work_range[0], (i - 1) % work_range[0]],
"type": type_temp
})
return result # Example[{pos: [0, 1]}, {pos: [2, 15]}, {pos: [20, 13]}]
def process_env(self):
pass
def reset(self):
self.agent = {"pos": [0, 0]}
self.checkPoint = False
"""
보상 지점 랜덤 설정 방법: 각 번호에 리스트를 만들어서 리스트에서 뽑고 리스트에서 제거
"""
objects_count = random.randint(1, 25)
self.objects = self.place_objects(
objects_count, [5, 5], except_position=[], self_place=[],
types_count=[objects_count//2]
)
# self.reset_reward()
return self.get_state()
def step(self, action):
self.counter += 1
self.process_env()
self.agent['pos'] = self.move_agent(self.agent['pos'], action)
check = self.check_if_reward()
done = check['if_goal']
reward = check['rewards']
s_ = self.get_state()
return s_, reward, done
# 반환할 상태에 맞게 배열 초기화
def create_base_map(self):
state_map = [] # [agent_pos, objects_map]
# agnet_pos
state_map.append([0, 0]) # agent position [y/map_size_y, x/map_size_x]
# objects map
state_map.append([])
for i in range(self.map_size[0]):
buffer = []
for j in range(self.map_size[1]):
buffer.append([0 for i in range(self.type_count)])
state_map[1].append(buffer)
return state_map
def render(self):
state_map = copy.deepcopy(self.base_map)
# agent 배치
state_map[0][0] = self.agent['pos'][0] / (self.map_size[0]-1)
state_map[0][1] = self.agent['pos'][1] / (self.map_size[1]-1)
# objects 배치
for obj in self.objects:
y = obj['pos'][0] # 세로
x = obj['pos'][1] # 가로
state_map[1][y][x][obj['type']] = 1
return state_map
def get_state(self): # map 에 속성값이 들어가 있는경우 반환할 상태로 변환처리
# NOTE 속성으로 좌표를 찍어둔 다음 state 출력할때 위치를 포함하기
states = self.render()
return states
# TODO def 환경 처리 함수 (ex - 매번 환경이 움직이는 경우 이것을 처리)
def move_agent(self, pos, action): # pos = [0, 0]
if action == 0: # 상
if pos[0] > 0:
pos[0] -= 1
elif action == 1: # 하
if pos[0] < self.map_size[0]-1:
pos[0] += 1
elif action == 2: # 우
if pos[1] < self.map_size[1] - 1:
pos[1] += 1
elif action == 3: # 좌
if pos[1] > 0:
pos[1] -= 1
return pos
# 규칙에 맞게 시각화 구축
def visualization(self, state):
for i in state[1]:
for j in i:
print("[{}, {}]".format(j[0], j[1]), end=" ")
print()
if __name__ == "__main__":
test_env = Env(env_var={
"step_deduction": -0.0001,
"objects_point_add": 1,
"end_point_add": 1
})
test_env.reset(