-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom_policy.py
60 lines (48 loc) · 1.95 KB
/
random_policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from gym.spaces import Box
import numpy as np
import random
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
class RandomPolicy(Policy):
"""Hand-coded policy that returns random actions."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Whether for compute_actions, the bounds given in action_space
# should be ignored (default: False). This is to test action-clipping
# and any Env's reaction to bounds breaches.
if self.config.get("ignore_action_bounds", False) and \
isinstance(self.action_space, Box):
self.action_space_for_sampling = Box(
-float("inf"),
float("inf"),
shape=self.action_space.shape,
dtype=self.action_space.dtype)
else:
self.action_space_for_sampling = self.action_space
@override(Policy)
def compute_actions(self,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
**kwargs):
# Alternatively, a numpy array would work here as well.
# e.g.: np.array([random.choice([0, 1])] * len(obs_batch))
return [self.action_space_for_sampling.sample() for _ in obs_batch], \
[], {}
@override(Policy)
def learn_on_batch(self, samples):
"""No learning."""
return {}
@override(Policy)
def compute_log_likelihoods(self,
actions,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None):
return np.array([random.random()] * len(obs_batch))
def get_weights(self):
pass
def set_weights(self, weights):
pass