-
Notifications
You must be signed in to change notification settings - Fork 1
/
test.py
executable file
·91 lines (81 loc) · 3.33 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
import torch
from src.env import create_train_env, ACTION_MAPPING
from src.model import PPO, ActorCritic
import torch.nn.functional as F
def get_args():
parser = argparse.ArgumentParser(
"""Implementation of model described in the paper: Proximal Policy Optimization and A3C Algorithms for Contra Nes""")
parser.add_argument("--level", type=int, default=1)
parser.add_argument("--saved_path", type=str, default="trained_models")
parser.add_argument("--method",type=str, default='PPO')
parser.add_argument("--output_path", type=str, default="output")
args = parser.parse_args()
return args
def test_a3c(opt):
torch.manual_seed(123)
env= create_train_env(opt.level, "{}/video_level_{}_A3C.mp4".format(opt.output_path, opt.level))
num_states, num_actions = env.observation_space.shape[0], len(ACTION_MAPPING)
model = ActorCritic(num_states, num_actions)
if torch.cuda.is_available():
model.load_state_dict(torch.load("{}/a3c_contra_level_{}_A3C".format(opt.saved_path, opt.level)))
model.cuda()
else:
model.load_state_dict(torch.load("{}/a3c_contra_level_{}_A3C".format(opt.saved_path, opt.level), map_location=torch.device('cpu')))
model.eval()
state = torch.from_numpy(env.reset())
done = True
while True:
if done:
h_0 = torch.zeros((1, 512), dtype=torch.float)
c_0 = torch.zeros((1, 512), dtype=torch.float)
env.reset()
else:
h_0 = h_0.detach()
c_0 = c_0.detach()
if torch.cuda.is_available():
h_0 = h_0.cuda()
c_0 = c_0.cuda()
state = state.cuda()
logits, value, h_0, c_0 = model(state, h_0, c_0)
policy = F.softmax(logits, dim=1)
action = torch.argmax(policy).item()
action = int(action)
state, reward, done, info = env.step(action)
state = torch.from_numpy(state)
env.render()
if info["level"] > opt.level or done:
print("Level {} completed".format(opt.level))
break
def test_ppo(opt):
torch.manual_seed(123)
env = create_train_env(opt.level, "{}/video_level_{}_PPO.mp4".format(opt.output_path, opt.level))
model = PPO(env.observation_space.shape[0], len(ACTION_MAPPING))
if torch.cuda.is_available():
model.load_state_dict(torch.load("{}/ppo_contra_level_{}_PPO".format(opt.saved_path, opt.level)))
model.cuda()
else:
model.load_state_dict(torch.load("{}/ppo_contra_level_{}_PPO".format(opt.saved_path, opt.level),
map_location=lambda storage, loc: storage))
model.eval()
state = torch.from_numpy(env.reset())
while True:
if torch.cuda.is_available():
state = state.cuda()
logits, value = model(state)
policy = F.softmax(logits, dim=1)
action = torch.argmax(policy).item()
state, reward, done, info = env.step(action)
state = torch.from_numpy(state)
env.render()
if info["level"] > opt.level or done:
print("Level {} completed".format(opt.level))
break
if __name__ == "__main__":
opt = get_args()
if opt.method.lower() == 'ppo':
test_ppo(opt)
elif opt.method.lower() == 'a3c':
test_a3c(opt)
else:
assert "Wrong method, please try again!"