-
Notifications
You must be signed in to change notification settings - Fork 2
/
server.py
77 lines (64 loc) · 2.12 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import socket
import struct
import pickle
import numpy as np
import gym
class Connection:
def __init__(self, s):
self._socket = s
self._buffer = bytearray()
def receive_object(self):
while len(self._buffer) < 4 or len(self._buffer) < struct.unpack("<L", self._buffer[:4])[0] + 4:
new_bytes = self._socket.recv(16)
if len(new_bytes) == 0:
return None
self._buffer += new_bytes
length = struct.unpack("<L", self._buffer[:4])[0]
header, body = self._buffer[:4], self._buffer[4:length + 4]
obj = pickle.loads(body)
self._buffer = self._buffer[length + 4:]
return obj
def send_object(self, d):
body = pickle.dumps(d, protocol=2)
header = struct.pack("<L", len(body))
msg = header + body
self._socket.send(msg)
class Env(gym.Env):
def __init__(self, addr):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(addr)
s.listen(1)
clientsocket, address = s.accept()
self._socket = clientsocket
self._conn = Connection(clientsocket)
self.action_space = None
self.observation_space = None
def reset(self):
self._conn.send_object("reset")
msg = self._conn.receive_object()
self.action_space = eval(msg["info"]["action_space"])
self.observation_space = eval(msg["info"]["observation_space"])
return msg["observation"]
def step(self, action):
self._conn.send_object(action.tolist())
msg = self._conn.receive_object()
obs = msg["observation"]
rwd = msg["reward"]
done = msg["done"]
info = msg["info"]
return obs, rwd, done, info
def close(self):
self._conn.send_object("close")
self._socket.close()
addr = ("127.0.0.1", 50710)
env = Env(addr)
obs = env.reset()
print(obs, env.action_space, env.observation_space)
for i in range(10):
a = env.action_space.sample()
obs, rwd, done, info = env.step(a)
print(i, obs, rwd, done, info)
if done:
print("resetting")
env.reset()
env.close()