From b4597a734cfe77576cd558373242a45d57165735 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Fri, 26 Jun 2020 17:33:30 -0700 Subject: [PATCH 1/8] Start adding NoisyObs --- src/seals/diagnostics/__init__.py | 6 +++ src/seals/diagnostics/noisy_obs.py | 64 ++++++++++++++++++++++++++++++ src/seals/util.py | 35 +++++++++++++++- 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 src/seals/diagnostics/noisy_obs.py diff --git a/src/seals/diagnostics/__init__.py b/src/seals/diagnostics/__init__.py index e894a53..e61b8c6 100644 --- a/src/seals/diagnostics/__init__.py +++ b/src/seals/diagnostics/__init__.py @@ -7,3 +7,9 @@ entry_point="seals.diagnostics.risky_path:RiskyPathEnv", max_episode_steps=5, ) + +gym.register( + id="seals/NoisyObs-v0", + entry_point="seals.diagnostics.noisy_obs:NoisyObsEnv", + max_episode_steps=15, +) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py new file mode 100644 index 0000000..f4e50b0 --- /dev/null +++ b/src/seals/diagnostics/noisy_obs.py @@ -0,0 +1,64 @@ +"""Environment testing for robustness to noise.""" + +from gym import spaces +import numpy as np + +from seals import base_envs, util + + +class NoisyObsEnv(base_envs.ResettablePOMDP): + """Simple gridworld with noisy observations. + + The agent randomly starts at the one of the corners of an MxM grid and + tries to reach and stay at the center. The observation consists of the + agent's (x,y) coordinates and L "distractor" samples of Gaussian noise . + The challenge is to select the relevant features in the observations, and + not overfit to noise. + """ + def __init__(self, *, size:int = 5, noise_length:int = 20): + """Build environment. + + Args: + size: width and height of gridworld. + noise_length: dimension of noise vector in observation. + """ + self._size = size + self._noise_length = noise_length + self._goal = np.array([self._size // 2, self._size // 2]) + + self._observation_space = spaces.Box( + low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)), + high=np.concatenate( + ([size - 1, size - 1], np.full(self._noise_length, np.inf),) + ), + dtype=float, + ) + + super().__init__( + state_space=spaces.MultiDiscrete([size, size]), + action_space=spaces.Discrete(5), + ) + + def terminal(self, state: int) -> bool: + return False + + def initial_state(self) -> int: + n = self._size + corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]]) + return corners[np.random.randint(4)] + + def reward(self, state: int, action: int, new_state: int) -> float: + return np.allclose(state, self.goal) + + def transition(self, state: int, action: int) -> int: + return util.grid_transition_fn( + state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1) + ) + + @property + def observation_space(self): + return self._observation_space + + def ob_from_state(self, state): + noise_vector = self.np_random.randn(self._noise_length) + return np.concatenate([state, noise_vector]) diff --git a/src/seals/util.py b/src/seals/util.py index 19f364f..89eaade 100644 --- a/src/seals/util.py +++ b/src/seals/util.py @@ -1,6 +1,6 @@ """Miscellaneous utilities.""" -from typing import Optional +from typing import Optional, Tuple import gym import numpy as np @@ -118,3 +118,36 @@ def sample_distribution( def one_hot_encoding(pos: int, size: int) -> np.ndarray: """Returns a 1-D hot encoding of a given position and size.""" return np.eye(size)[pos] + + +def grid_transition_fn( + state: int, action: int, x_bounds: Tuple[float, float] =(-np.inf, np.inf), y_bounds: Tuple[float, float] = (-np.inf, np.inf), +): + """Returns transition of a deterministic gridworld. + + Agent is bounded in the region limited by x_bounds and y_bounds, + ends inclusive. + + Actions: + 0: Right + 1: Down + 2: Left + 3: Up + 4: Stay put + """ + dirs = [ + (1, 0), + (0, 1), + (-1, 0), + (0, -1), + (0, 0), + ] + + x, y = state + dx, dy = dirs[action] + + next_x = np.clip(x + dx, *x_bounds) + next_y = np.clip(y + dy, *y_bounds) + next_state = np.array([next_x, next_y], dtype=state.dtype) + + return next_state From 75a0ea44a53dc5e97c2f9be2b37b90f6506c56d6 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 08:45:39 -0700 Subject: [PATCH 2/8] Fix NoisyObs details. --- src/seals/diagnostics/noisy_obs.py | 41 +++++++++++++++--------------- src/seals/util.py | 5 +++- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index f4e50b0..4b3e973 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -15,7 +15,8 @@ class NoisyObsEnv(base_envs.ResettablePOMDP): The challenge is to select the relevant features in the observations, and not overfit to noise. """ - def __init__(self, *, size:int = 5, noise_length:int = 20): + + def __init__(self, *, size: int = 5, noise_length: int = 20): """Build environment. Args: @@ -26,39 +27,39 @@ def __init__(self, *, size:int = 5, noise_length:int = 20): self._noise_length = noise_length self._goal = np.array([self._size // 2, self._size // 2]) - self._observation_space = spaces.Box( - low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)), - high=np.concatenate( - ([size - 1, size - 1], np.full(self._noise_length, np.inf),) - ), - dtype=float, - ) - super().__init__( state_space=spaces.MultiDiscrete([size, size]), action_space=spaces.Discrete(5), + observation_space=spaces.Box( + low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)), + high=np.concatenate( + ([size - 1, size - 1], np.full(self._noise_length, np.inf),) + ), + dtype=float, + ), ) - def terminal(self, state: int) -> bool: + def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool: + """Always returns False.""" return False - def initial_state(self) -> int: + def initial_state(self) -> np.ndarray: + """Returns one of the grid's corners.""" n = self._size corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]]) return corners[np.random.randint(4)] - def reward(self, state: int, action: int, new_state: int) -> float: - return np.allclose(state, self.goal) + def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: + """Returns positive reward if state is the goal.""" + return np.allclose(state, self._goal) - def transition(self, state: int, action: int) -> int: + def transition(self, state: np.ndarray, action: int) -> np.ndarray: + """Returns next state according to grid.""" return util.grid_transition_fn( state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1) ) - @property - def observation_space(self): - return self._observation_space - - def ob_from_state(self, state): - noise_vector = self.np_random.randn(self._noise_length) + def obs_from_state(self, state: np.ndarray) -> np.ndarray: + """Returns (x, y) concatenated with Gaussian noise.""" + noise_vector = self.rand_state.randn(self._noise_length) return np.concatenate([state, noise_vector]) diff --git a/src/seals/util.py b/src/seals/util.py index 89eaade..7c7663b 100644 --- a/src/seals/util.py +++ b/src/seals/util.py @@ -121,7 +121,10 @@ def one_hot_encoding(pos: int, size: int) -> np.ndarray: def grid_transition_fn( - state: int, action: int, x_bounds: Tuple[float, float] =(-np.inf, np.inf), y_bounds: Tuple[float, float] = (-np.inf, np.inf), + state: np.ndarray, + action: int, + x_bounds: Tuple[float, float] = (-np.inf, np.inf), + y_bounds: Tuple[float, float] = (-np.inf, np.inf), ): """Returns transition of a deterministic gridworld. From 8bfb7e63bc45fe0e43bd2365ccbda1029e397b60 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 11:03:22 -0700 Subject: [PATCH 3/8] Fix commas. --- src/seals/diagnostics/noisy_obs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index 4b3e973..fde210c 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -31,9 +31,9 @@ def __init__(self, *, size: int = 5, noise_length: int = 20): state_space=spaces.MultiDiscrete([size, size]), action_space=spaces.Discrete(5), observation_space=spaces.Box( - low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)), + low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf))), high=np.concatenate( - ([size - 1, size - 1], np.full(self._noise_length, np.inf),) + ([size - 1, size - 1], np.full(self._noise_length, np.inf)), ), dtype=float, ), @@ -56,7 +56,7 @@ def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float def transition(self, state: np.ndarray, action: int) -> np.ndarray: """Returns next state according to grid.""" return util.grid_transition_fn( - state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1) + state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1), ) def obs_from_state(self, state: np.ndarray) -> np.ndarray: From 356c7f1b0ed8e0181809ef8cf76e079ad0db2a21 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 11:10:12 -0700 Subject: [PATCH 4/8] Fix NoisyObsEnv randomness --- src/seals/diagnostics/noisy_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index fde210c..fa277a2 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -47,7 +47,7 @@ def initial_state(self) -> np.ndarray: """Returns one of the grid's corners.""" n = self._size corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]]) - return corners[np.random.randint(4)] + return corners[np.rand_state.randint(4)] def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: """Returns positive reward if state is the goal.""" From ce6a7e18db7080030edc0e641ea4506223059024 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 11:34:30 -0700 Subject: [PATCH 5/8] Fix NoisyObsEnv random state --- src/seals/diagnostics/noisy_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index fa277a2..2e5cbfd 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -47,7 +47,7 @@ def initial_state(self) -> np.ndarray: """Returns one of the grid's corners.""" n = self._size corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]]) - return corners[np.rand_state.randint(4)] + return corners[self.rand_state.randint(4)] def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: """Returns positive reward if state is the goal.""" From a08035494c744d2d7075fcc93d186b468b5d44bd Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 11:38:44 -0700 Subject: [PATCH 6/8] Make NoisyObsEnv reward a float --- src/seals/diagnostics/noisy_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index 2e5cbfd..9b38295 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -51,7 +51,7 @@ def initial_state(self) -> np.ndarray: def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: """Returns positive reward if state is the goal.""" - return np.allclose(state, self._goal) + return float(np.allclose(state, self._goal)) def transition(self, state: np.ndarray, action: int) -> np.ndarray: """Returns next state according to grid.""" From 97140120f006948eac99413d3afcdbd40d5830ad Mon Sep 17 00:00:00 2001 From: pedrofreire Date: Tue, 30 Jun 2020 15:30:11 +0200 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Adam Gleave --- src/seals/diagnostics/noisy_obs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index 9b38295..8a88e54 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -35,7 +35,7 @@ def __init__(self, *, size: int = 5, noise_length: int = 20): high=np.concatenate( ([size - 1, size - 1], np.full(self._noise_length, np.inf)), ), - dtype=float, + dtype=np.float32, ), ) @@ -50,7 +50,7 @@ def initial_state(self) -> np.ndarray: return corners[self.rand_state.randint(4)] def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: - """Returns positive reward if state is the goal.""" + """Returns +1.0 reward if state is the goal and 0.0 otherwise.""" return float(np.allclose(state, self._goal)) def transition(self, state: np.ndarray, action: int) -> np.ndarray: From 9c8cffefefc154852a94f0c7b8b8e747393dc170 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Tue, 30 Jun 2020 06:39:18 -0700 Subject: [PATCH 8/8] np.allclose -> np.all, add top-left corner comment --- src/seals/diagnostics/noisy_obs.py | 2 +- src/seals/util.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index 9b38295..dfcc1c4 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -51,7 +51,7 @@ def initial_state(self) -> np.ndarray: def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: """Returns positive reward if state is the goal.""" - return float(np.allclose(state, self._goal)) + return float(np.all(state == self._goal)) def transition(self, state: np.ndarray, action: int) -> np.ndarray: """Returns next state according to grid.""" diff --git a/src/seals/util.py b/src/seals/util.py index 7c7663b..66a118a 100644 --- a/src/seals/util.py +++ b/src/seals/util.py @@ -131,6 +131,8 @@ def grid_transition_fn( Agent is bounded in the region limited by x_bounds and y_bounds, ends inclusive. + (0, 0) is interpreted to be top-left corner. + Actions: 0: Right 1: Down