From b4597a734cfe77576cd558373242a45d57165735 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Fri, 26 Jun 2020 17:33:30 -0700
Subject: [PATCH 1/8] Start adding NoisyObs

---
 src/seals/diagnostics/__init__.py  |  6 +++
 src/seals/diagnostics/noisy_obs.py | 64 ++++++++++++++++++++++++++++++
 src/seals/util.py                  | 35 +++++++++++++++-
 3 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 src/seals/diagnostics/noisy_obs.py

diff --git a/src/seals/diagnostics/__init__.py b/src/seals/diagnostics/__init__.py
index e894a53..e61b8c6 100644
--- a/src/seals/diagnostics/__init__.py
+++ b/src/seals/diagnostics/__init__.py
@@ -7,3 +7,9 @@
     entry_point="seals.diagnostics.risky_path:RiskyPathEnv",
     max_episode_steps=5,
 )
+
+gym.register(
+    id="seals/NoisyObs-v0",
+    entry_point="seals.diagnostics.noisy_obs:NoisyObsEnv",
+    max_episode_steps=15,
+)
diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
new file mode 100644
index 0000000..f4e50b0
--- /dev/null
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -0,0 +1,64 @@
+"""Environment testing for robustness to noise."""
+
+from gym import spaces
+import numpy as np
+
+from seals import base_envs, util
+
+
+class NoisyObsEnv(base_envs.ResettablePOMDP):
+    """Simple gridworld with noisy observations.
+
+    The agent randomly starts at the one of the corners of an MxM grid and
+    tries to reach and stay at the center. The observation consists of the
+    agent's (x,y) coordinates and L "distractor" samples of Gaussian noise .
+    The challenge is to select the relevant features in the observations, and
+    not overfit to noise.
+    """
+    def __init__(self, *, size:int = 5, noise_length:int = 20):
+        """Build environment.
+
+        Args:
+            size: width and height of gridworld.
+            noise_length: dimension of noise vector in observation.
+        """
+        self._size = size
+        self._noise_length = noise_length
+        self._goal = np.array([self._size // 2, self._size // 2])
+
+        self._observation_space = spaces.Box(
+            low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)),
+            high=np.concatenate(
+                ([size - 1, size - 1], np.full(self._noise_length, np.inf),)
+            ),
+            dtype=float,
+        )
+
+        super().__init__(
+            state_space=spaces.MultiDiscrete([size, size]),
+            action_space=spaces.Discrete(5),
+        )
+
+    def terminal(self, state: int) -> bool:
+        return False
+
+    def initial_state(self) -> int:
+        n = self._size
+        corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
+        return corners[np.random.randint(4)]
+
+    def reward(self, state: int, action: int, new_state: int) -> float:
+        return np.allclose(state, self.goal)
+
+    def transition(self, state: int, action: int) -> int:
+        return util.grid_transition_fn(
+            state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1)
+        )
+
+    @property
+    def observation_space(self):
+        return self._observation_space
+
+    def ob_from_state(self, state):
+        noise_vector = self.np_random.randn(self._noise_length)
+        return np.concatenate([state, noise_vector])
diff --git a/src/seals/util.py b/src/seals/util.py
index 19f364f..89eaade 100644
--- a/src/seals/util.py
+++ b/src/seals/util.py
@@ -1,6 +1,6 @@
 """Miscellaneous utilities."""
 
-from typing import Optional
+from typing import Optional, Tuple
 
 import gym
 import numpy as np
@@ -118,3 +118,36 @@ def sample_distribution(
 def one_hot_encoding(pos: int, size: int) -> np.ndarray:
     """Returns a 1-D hot encoding of a given position and size."""
     return np.eye(size)[pos]
+
+
+def grid_transition_fn(
+        state: int, action: int, x_bounds: Tuple[float, float] =(-np.inf, np.inf), y_bounds: Tuple[float, float] = (-np.inf, np.inf),
+):
+    """Returns transition of a deterministic gridworld.
+
+    Agent is bounded in the region limited by x_bounds and y_bounds,
+    ends inclusive.
+
+    Actions:
+    0: Right
+    1: Down
+    2: Left
+    3: Up
+    4: Stay put
+    """
+    dirs = [
+        (1, 0),
+        (0, 1),
+        (-1, 0),
+        (0, -1),
+        (0, 0),
+    ]
+
+    x, y = state
+    dx, dy = dirs[action]
+
+    next_x = np.clip(x + dx, *x_bounds)
+    next_y = np.clip(y + dy, *y_bounds)
+    next_state = np.array([next_x, next_y], dtype=state.dtype)
+
+    return next_state

From 75a0ea44a53dc5e97c2f9be2b37b90f6506c56d6 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 08:45:39 -0700
Subject: [PATCH 2/8] Fix NoisyObs details.

---
 src/seals/diagnostics/noisy_obs.py | 41 +++++++++++++++---------------
 src/seals/util.py                  |  5 +++-
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index f4e50b0..4b3e973 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -15,7 +15,8 @@ class NoisyObsEnv(base_envs.ResettablePOMDP):
     The challenge is to select the relevant features in the observations, and
     not overfit to noise.
     """
-    def __init__(self, *, size:int = 5, noise_length:int = 20):
+
+    def __init__(self, *, size: int = 5, noise_length: int = 20):
         """Build environment.
 
         Args:
@@ -26,39 +27,39 @@ def __init__(self, *, size:int = 5, noise_length:int = 20):
         self._noise_length = noise_length
         self._goal = np.array([self._size // 2, self._size // 2])
 
-        self._observation_space = spaces.Box(
-            low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)),
-            high=np.concatenate(
-                ([size - 1, size - 1], np.full(self._noise_length, np.inf),)
-            ),
-            dtype=float,
-        )
-
         super().__init__(
             state_space=spaces.MultiDiscrete([size, size]),
             action_space=spaces.Discrete(5),
+            observation_space=spaces.Box(
+                low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)),
+                high=np.concatenate(
+                    ([size - 1, size - 1], np.full(self._noise_length, np.inf),)
+                ),
+                dtype=float,
+            ),
         )
 
-    def terminal(self, state: int) -> bool:
+    def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
+        """Always returns False."""
         return False
 
-    def initial_state(self) -> int:
+    def initial_state(self) -> np.ndarray:
+        """Returns one of the grid's corners."""
         n = self._size
         corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
         return corners[np.random.randint(4)]
 
-    def reward(self, state: int, action: int, new_state: int) -> float:
-        return np.allclose(state, self.goal)
+    def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
+        """Returns positive reward if state is the goal."""
+        return np.allclose(state, self._goal)
 
-    def transition(self, state: int, action: int) -> int:
+    def transition(self, state: np.ndarray, action: int) -> np.ndarray:
+        """Returns next state according to grid."""
         return util.grid_transition_fn(
             state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1)
         )
 
-    @property
-    def observation_space(self):
-        return self._observation_space
-
-    def ob_from_state(self, state):
-        noise_vector = self.np_random.randn(self._noise_length)
+    def obs_from_state(self, state: np.ndarray) -> np.ndarray:
+        """Returns (x, y) concatenated with Gaussian noise."""
+        noise_vector = self.rand_state.randn(self._noise_length)
         return np.concatenate([state, noise_vector])
diff --git a/src/seals/util.py b/src/seals/util.py
index 89eaade..7c7663b 100644
--- a/src/seals/util.py
+++ b/src/seals/util.py
@@ -121,7 +121,10 @@ def one_hot_encoding(pos: int, size: int) -> np.ndarray:
 
 
 def grid_transition_fn(
-        state: int, action: int, x_bounds: Tuple[float, float] =(-np.inf, np.inf), y_bounds: Tuple[float, float] = (-np.inf, np.inf),
+    state: np.ndarray,
+    action: int,
+    x_bounds: Tuple[float, float] = (-np.inf, np.inf),
+    y_bounds: Tuple[float, float] = (-np.inf, np.inf),
 ):
     """Returns transition of a deterministic gridworld.
 

From 8bfb7e63bc45fe0e43bd2365ccbda1029e397b60 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 11:03:22 -0700
Subject: [PATCH 3/8] Fix commas.

---
 src/seals/diagnostics/noisy_obs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index 4b3e973..fde210c 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -31,9 +31,9 @@ def __init__(self, *, size: int = 5, noise_length: int = 20):
             state_space=spaces.MultiDiscrete([size, size]),
             action_space=spaces.Discrete(5),
             observation_space=spaces.Box(
-                low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf),)),
+                low=np.concatenate(([0, 0], np.full(self._noise_length, -np.inf))),
                 high=np.concatenate(
-                    ([size - 1, size - 1], np.full(self._noise_length, np.inf),)
+                    ([size - 1, size - 1], np.full(self._noise_length, np.inf)),
                 ),
                 dtype=float,
             ),
@@ -56,7 +56,7 @@ def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float
     def transition(self, state: np.ndarray, action: int) -> np.ndarray:
         """Returns next state according to grid."""
         return util.grid_transition_fn(
-            state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1)
+            state, action, x_bounds=(0, self._size - 1), y_bounds=(0, self._size - 1),
         )
 
     def obs_from_state(self, state: np.ndarray) -> np.ndarray:

From 356c7f1b0ed8e0181809ef8cf76e079ad0db2a21 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 11:10:12 -0700
Subject: [PATCH 4/8] Fix NoisyObsEnv randomness

---
 src/seals/diagnostics/noisy_obs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index fde210c..fa277a2 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -47,7 +47,7 @@ def initial_state(self) -> np.ndarray:
         """Returns one of the grid's corners."""
         n = self._size
         corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
-        return corners[np.random.randint(4)]
+        return corners[np.rand_state.randint(4)]
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
         """Returns positive reward if state is the goal."""

From ce6a7e18db7080030edc0e641ea4506223059024 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 11:34:30 -0700
Subject: [PATCH 5/8] Fix NoisyObsEnv random state

---
 src/seals/diagnostics/noisy_obs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index fa277a2..2e5cbfd 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -47,7 +47,7 @@ def initial_state(self) -> np.ndarray:
         """Returns one of the grid's corners."""
         n = self._size
         corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
-        return corners[np.rand_state.randint(4)]
+        return corners[self.rand_state.randint(4)]
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
         """Returns positive reward if state is the goal."""

From a08035494c744d2d7075fcc93d186b468b5d44bd Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 11:38:44 -0700
Subject: [PATCH 6/8] Make NoisyObsEnv reward a float

---
 src/seals/diagnostics/noisy_obs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index 2e5cbfd..9b38295 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -51,7 +51,7 @@ def initial_state(self) -> np.ndarray:
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
         """Returns positive reward if state is the goal."""
-        return np.allclose(state, self._goal)
+        return float(np.allclose(state, self._goal))
 
     def transition(self, state: np.ndarray, action: int) -> np.ndarray:
         """Returns next state according to grid."""

From 97140120f006948eac99413d3afcdbd40d5830ad Mon Sep 17 00:00:00 2001
From: pedrofreire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 15:30:11 +0200
Subject: [PATCH 7/8] Apply suggestions from code review

Co-authored-by: Adam Gleave <adam@gleave.me>
---
 src/seals/diagnostics/noisy_obs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index 9b38295..8a88e54 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -35,7 +35,7 @@ def __init__(self, *, size: int = 5, noise_length: int = 20):
                 high=np.concatenate(
                     ([size - 1, size - 1], np.full(self._noise_length, np.inf)),
                 ),
-                dtype=float,
+                dtype=np.float32,
             ),
         )
 
@@ -50,7 +50,7 @@ def initial_state(self) -> np.ndarray:
         return corners[self.rand_state.randint(4)]
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
-        """Returns positive reward if state is the goal."""
+        """Returns  +1.0 reward if state is the goal and 0.0 otherwise."""
         return float(np.allclose(state, self._goal))
 
     def transition(self, state: np.ndarray, action: int) -> np.ndarray:

From 9c8cffefefc154852a94f0c7b8b8e747393dc170 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 06:39:18 -0700
Subject: [PATCH 8/8] np.allclose -> np.all, add top-left corner comment

---
 src/seals/diagnostics/noisy_obs.py | 2 +-
 src/seals/util.py                  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
index 9b38295..dfcc1c4 100644
--- a/src/seals/diagnostics/noisy_obs.py
+++ b/src/seals/diagnostics/noisy_obs.py
@@ -51,7 +51,7 @@ def initial_state(self) -> np.ndarray:
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
         """Returns positive reward if state is the goal."""
-        return float(np.allclose(state, self._goal))
+        return float(np.all(state == self._goal))
 
     def transition(self, state: np.ndarray, action: int) -> np.ndarray:
         """Returns next state according to grid."""
diff --git a/src/seals/util.py b/src/seals/util.py
index 7c7663b..66a118a 100644
--- a/src/seals/util.py
+++ b/src/seals/util.py
@@ -131,6 +131,8 @@ def grid_transition_fn(
     Agent is bounded in the region limited by x_bounds and y_bounds,
     ends inclusive.
 
+    (0, 0) is interpreted to be top-left corner.
+
     Actions:
     0: Right
     1: Down