From 749d41f2d928c0ab42546a8062b172da19bd7187 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Sat, 27 Jun 2020 10:48:18 -0700
Subject: [PATCH 1/5] Add ProcGoal environment.

---
 src/seals/diagnostics/__init__.py  |  6 +++
 src/seals/diagnostics/proc_goal.py | 62 ++++++++++++++++++++++++++++++
 src/seals/util.py                  | 38 +++++++++++++++++-
 3 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 src/seals/diagnostics/proc_goal.py

diff --git a/src/seals/diagnostics/__init__.py b/src/seals/diagnostics/__init__.py
index e894a53..8ba1fc5 100644
--- a/src/seals/diagnostics/__init__.py
+++ b/src/seals/diagnostics/__init__.py
@@ -7,3 +7,9 @@
     entry_point="seals.diagnostics.risky_path:RiskyPathEnv",
     max_episode_steps=5,
 )
+
+gym.register(
+    id="seals/ProcGoal-v0",
+    entry_point="seals.diagnostics.proc_goal:ProcGoalEnv",
+    max_episode_steps=20,
+)
diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
new file mode 100644
index 0000000..f3bc62f
--- /dev/null
+++ b/src/seals/diagnostics/proc_goal.py
@@ -0,0 +1,62 @@
+"""Gridworld with large random initial state."""
+
+from gym import spaces
+import numpy as np
+
+from seals import base_envs, util
+
+
+class ProcGoalEnv(base_envs.ResettableMDP):
+    """Gridworld with large random initial state.
+
+    In this task, the agent starts at a random position in a large
+    grid, and must navigate to a goal randomly placed in a
+    neighborhood around the agent.  The observation is a 4-dimensional
+    vector containing the (x,y) coordinates of the agent and the goal.
+    The reward at each timestep is the negative Manhattan distance
+    between the two positions.  With a large enough grid, generalizing
+    is necessary to achieve good performance, since most initial
+    states will be unseen.
+    """
+
+    def __init__(self, bounds: int = 100, distance: int = 10):
+        """Constructs environment.
+
+        Args:
+            bounds: the absolute values of the agent's coordinates are
+                bounded by `bounds`. Increasing the value might make
+                generalization harder.
+            distance: initial distance between agent and goal.
+        """
+        self._bounds = bounds
+        self._distance = distance
+
+        super().__init__(
+            state_space=spaces.Box(low=-np.inf, high=np.inf, shape=(4,)),
+            action_space=spaces.Discrete(5),
+        )
+
+    def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
+        """Always returns False."""
+        return False
+
+    def initial_state(self) -> np.ndarray:
+        """Samples random agent position and random goal."""
+        pos = self.rand_state.randint(low=-self._bounds, high=self._bounds, size=(2,))
+
+        x_dist = self.rand_state.randint(self._distance)
+        y_dist = self._distance - x_dist
+        random_signs = 2 * self.rand_state.randint(2, size=2) - 1
+        goal = pos + random_signs * (x_dist, y_dist)
+
+        return np.concatenate([pos, goal]).astype(self.observation_space.dtype)
+
+    def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
+        """Negative L1 distance to goal."""
+        return (-1) * np.sum(np.abs(state[2:] - state[:2]))
+
+    def transition(self, state: np.ndarray, action: int) -> np.ndarray:
+        """Returns next state according to grid."""
+        pos, goal = state[:2], state[2:]
+        next_pos = util.grid_transition_fn(pos, action)
+        return np.concatenate([next_pos, goal])
diff --git a/src/seals/util.py b/src/seals/util.py
index 19f364f..7c7663b 100644
--- a/src/seals/util.py
+++ b/src/seals/util.py
@@ -1,6 +1,6 @@
 """Miscellaneous utilities."""
 
-from typing import Optional
+from typing import Optional, Tuple
 
 import gym
 import numpy as np
@@ -118,3 +118,39 @@ def sample_distribution(
 def one_hot_encoding(pos: int, size: int) -> np.ndarray:
     """Returns a 1-D hot encoding of a given position and size."""
     return np.eye(size)[pos]
+
+
+def grid_transition_fn(
+    state: np.ndarray,
+    action: int,
+    x_bounds: Tuple[float, float] = (-np.inf, np.inf),
+    y_bounds: Tuple[float, float] = (-np.inf, np.inf),
+):
+    """Returns transition of a deterministic gridworld.
+
+    Agent is bounded in the region limited by x_bounds and y_bounds,
+    ends inclusive.
+
+    Actions:
+    0: Right
+    1: Down
+    2: Left
+    3: Up
+    4: Stay put
+    """
+    dirs = [
+        (1, 0),
+        (0, 1),
+        (-1, 0),
+        (0, -1),
+        (0, 0),
+    ]
+
+    x, y = state
+    dx, dy = dirs[action]
+
+    next_x = np.clip(x + dx, *x_bounds)
+    next_y = np.clip(y + dy, *y_bounds)
+    next_state = np.array([next_x, next_y], dtype=state.dtype)
+
+    return next_state

From 4b5b18ca4b13426a03c4450ef84c2d7fe2b25483 Mon Sep 17 00:00:00 2001
From: pedrofreire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 15:31:50 +0200
Subject: [PATCH 2/5] Apply suggestions from code review

Co-authored-by: Adam Gleave <adam@gleave.me>
---
 src/seals/diagnostics/proc_goal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
index f3bc62f..532abae 100644
--- a/src/seals/diagnostics/proc_goal.py
+++ b/src/seals/diagnostics/proc_goal.py
@@ -1,4 +1,4 @@
-"""Gridworld with large random initial state."""
+"""Large gridworld with random agent and goal position."""
 
 from gym import spaces
 import numpy as np
@@ -7,7 +7,7 @@
 
 
 class ProcGoalEnv(base_envs.ResettableMDP):
-    """Gridworld with large random initial state.
+"""Large gridworld with random agent and goal position.
 
     In this task, the agent starts at a random position in a large
     grid, and must navigate to a goal randomly placed in a

From 782d8ebf8d30bb8f4a27a89b57025e842ff6222a Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 07:17:10 -0700
Subject: [PATCH 3/5] Clarify bounds parameter docstring

---
 src/seals/diagnostics/proc_goal.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
index 532abae..5347f07 100644
--- a/src/seals/diagnostics/proc_goal.py
+++ b/src/seals/diagnostics/proc_goal.py
@@ -23,9 +23,9 @@ def __init__(self, bounds: int = 100, distance: int = 10):
         """Constructs environment.
 
         Args:
-            bounds: the absolute values of the agent's coordinates are
-                bounded by `bounds`. Increasing the value might make
-                generalization harder.
+            bounds: the absolute values of the coordinates of the initial agent
+            position are bounded by `bounds`. Increasing the value might make
+            generalization harder.
             distance: initial distance between agent and goal.
         """
         self._bounds = bounds

From 02d62b6cd8611a337c4314a4bbfc6156ea0db951 Mon Sep 17 00:00:00 2001
From: Pedro Freire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 07:25:18 -0700
Subject: [PATCH 4/5] Fix docstring identation

---
 src/seals/diagnostics/proc_goal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
index 5347f07..e05412e 100644
--- a/src/seals/diagnostics/proc_goal.py
+++ b/src/seals/diagnostics/proc_goal.py
@@ -7,7 +7,7 @@
 
 
 class ProcGoalEnv(base_envs.ResettableMDP):
-"""Large gridworld with random agent and goal position.
+    """Large gridworld with random agent and goal position.
 
     In this task, the agent starts at a random position in a large
     grid, and must navigate to a goal randomly placed in a

From 049fde8381e9e20460cda49b0010614e0b873b38 Mon Sep 17 00:00:00 2001
From: pedrofreire <pedrofreirex@gmail.com>
Date: Tue, 30 Jun 2020 23:11:43 +0200
Subject: [PATCH 5/5] Apply suggestions from code review

Co-authored-by: Adam Gleave <adam@gleave.me>
---
 src/seals/diagnostics/proc_goal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
index e05412e..e534b72 100644
--- a/src/seals/diagnostics/proc_goal.py
+++ b/src/seals/diagnostics/proc_goal.py
@@ -24,8 +24,8 @@ def __init__(self, bounds: int = 100, distance: int = 10):
 
         Args:
             bounds: the absolute values of the coordinates of the initial agent
-            position are bounded by `bounds`. Increasing the value might make
-            generalization harder.
+                position are bounded by `bounds`. Increasing the value might make
+                generalization harder.
             distance: initial distance between agent and goal.
         """
         self._bounds = bounds