-
Notifications
You must be signed in to change notification settings - Fork 71
/
param_noise.py
51 lines (41 loc) · 1.62 KB
/
param_noise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
import torch
from math import sqrt
"""
From OpenAI Baselines:
https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py
"""
class AdaptiveParamNoiseSpec(object):
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.2, adaptation_coefficient=1.01):
"""
Note that initial_stddev and current_stddev refer to std of parameter noise,
but desired_action_stddev refers to (as name notes) desired std in action space
"""
self.initial_stddev = initial_stddev
self.desired_action_stddev = desired_action_stddev
self.adaptation_coefficient = adaptation_coefficient
self.current_stddev = initial_stddev
def adapt(self, distance):
if distance > self.desired_action_stddev:
# Decrease stddev.
self.current_stddev /= self.adaptation_coefficient
else:
# Increase stddev.
self.current_stddev *= self.adaptation_coefficient
def get_stats(self):
stats = {
'param_noise_stddev': self.current_stddev,
}
return stats
def __repr__(self):
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adaptation_coefficient={})'
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adaptation_coefficient)
def ddpg_distance_metric(actions1, actions2):
"""
Compute "distance" between actions taken by two policies at the same states
Expects numpy arrays
"""
diff = actions1-actions2
mean_diff = np.mean(np.square(diff), axis=0)
dist = sqrt(np.mean(mean_diff))
return dist