From 749d41f2d928c0ab42546a8062b172da19bd7187 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Sat, 27 Jun 2020 10:48:18 -0700 Subject: [PATCH 1/5] Add ProcGoal environment. --- src/seals/diagnostics/__init__.py | 6 +++ src/seals/diagnostics/proc_goal.py | 62 ++++++++++++++++++++++++++++++ src/seals/util.py | 38 +++++++++++++++++- 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 src/seals/diagnostics/proc_goal.py diff --git a/src/seals/diagnostics/__init__.py b/src/seals/diagnostics/__init__.py index e894a53..8ba1fc5 100644 --- a/src/seals/diagnostics/__init__.py +++ b/src/seals/diagnostics/__init__.py @@ -7,3 +7,9 @@ entry_point="seals.diagnostics.risky_path:RiskyPathEnv", max_episode_steps=5, ) + +gym.register( + id="seals/ProcGoal-v0", + entry_point="seals.diagnostics.proc_goal:ProcGoalEnv", + max_episode_steps=20, +) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py new file mode 100644 index 0000000..f3bc62f --- /dev/null +++ b/src/seals/diagnostics/proc_goal.py @@ -0,0 +1,62 @@ +"""Gridworld with large random initial state.""" + +from gym import spaces +import numpy as np + +from seals import base_envs, util + + +class ProcGoalEnv(base_envs.ResettableMDP): + """Gridworld with large random initial state. + + In this task, the agent starts at a random position in a large + grid, and must navigate to a goal randomly placed in a + neighborhood around the agent. The observation is a 4-dimensional + vector containing the (x,y) coordinates of the agent and the goal. + The reward at each timestep is the negative Manhattan distance + between the two positions. With a large enough grid, generalizing + is necessary to achieve good performance, since most initial + states will be unseen. + """ + + def __init__(self, bounds: int = 100, distance: int = 10): + """Constructs environment. + + Args: + bounds: the absolute values of the agent's coordinates are + bounded by `bounds`. Increasing the value might make + generalization harder. + distance: initial distance between agent and goal. + """ + self._bounds = bounds + self._distance = distance + + super().__init__( + state_space=spaces.Box(low=-np.inf, high=np.inf, shape=(4,)), + action_space=spaces.Discrete(5), + ) + + def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool: + """Always returns False.""" + return False + + def initial_state(self) -> np.ndarray: + """Samples random agent position and random goal.""" + pos = self.rand_state.randint(low=-self._bounds, high=self._bounds, size=(2,)) + + x_dist = self.rand_state.randint(self._distance) + y_dist = self._distance - x_dist + random_signs = 2 * self.rand_state.randint(2, size=2) - 1 + goal = pos + random_signs * (x_dist, y_dist) + + return np.concatenate([pos, goal]).astype(self.observation_space.dtype) + + def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: + """Negative L1 distance to goal.""" + return (-1) * np.sum(np.abs(state[2:] - state[:2])) + + def transition(self, state: np.ndarray, action: int) -> np.ndarray: + """Returns next state according to grid.""" + pos, goal = state[:2], state[2:] + next_pos = util.grid_transition_fn(pos, action) + return np.concatenate([next_pos, goal]) diff --git a/src/seals/util.py b/src/seals/util.py index 19f364f..7c7663b 100644 --- a/src/seals/util.py +++ b/src/seals/util.py @@ -1,6 +1,6 @@ """Miscellaneous utilities.""" -from typing import Optional +from typing import Optional, Tuple import gym import numpy as np @@ -118,3 +118,39 @@ def sample_distribution( def one_hot_encoding(pos: int, size: int) -> np.ndarray: """Returns a 1-D hot encoding of a given position and size.""" return np.eye(size)[pos] + + +def grid_transition_fn( + state: np.ndarray, + action: int, + x_bounds: Tuple[float, float] = (-np.inf, np.inf), + y_bounds: Tuple[float, float] = (-np.inf, np.inf), +): + """Returns transition of a deterministic gridworld. + + Agent is bounded in the region limited by x_bounds and y_bounds, + ends inclusive. + + Actions: + 0: Right + 1: Down + 2: Left + 3: Up + 4: Stay put + """ + dirs = [ + (1, 0), + (0, 1), + (-1, 0), + (0, -1), + (0, 0), + ] + + x, y = state + dx, dy = dirs[action] + + next_x = np.clip(x + dx, *x_bounds) + next_y = np.clip(y + dy, *y_bounds) + next_state = np.array([next_x, next_y], dtype=state.dtype) + + return next_state From 4b5b18ca4b13426a03c4450ef84c2d7fe2b25483 Mon Sep 17 00:00:00 2001 From: pedrofreire Date: Tue, 30 Jun 2020 15:31:50 +0200 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: Adam Gleave --- src/seals/diagnostics/proc_goal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py index f3bc62f..532abae 100644 --- a/src/seals/diagnostics/proc_goal.py +++ b/src/seals/diagnostics/proc_goal.py @@ -1,4 +1,4 @@ -"""Gridworld with large random initial state.""" +"""Large gridworld with random agent and goal position.""" from gym import spaces import numpy as np @@ -7,7 +7,7 @@ class ProcGoalEnv(base_envs.ResettableMDP): - """Gridworld with large random initial state. +"""Large gridworld with random agent and goal position. In this task, the agent starts at a random position in a large grid, and must navigate to a goal randomly placed in a From 782d8ebf8d30bb8f4a27a89b57025e842ff6222a Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Tue, 30 Jun 2020 07:17:10 -0700 Subject: [PATCH 3/5] Clarify bounds parameter docstring --- src/seals/diagnostics/proc_goal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py index 532abae..5347f07 100644 --- a/src/seals/diagnostics/proc_goal.py +++ b/src/seals/diagnostics/proc_goal.py @@ -23,9 +23,9 @@ def __init__(self, bounds: int = 100, distance: int = 10): """Constructs environment. Args: - bounds: the absolute values of the agent's coordinates are - bounded by `bounds`. Increasing the value might make - generalization harder. + bounds: the absolute values of the coordinates of the initial agent + position are bounded by `bounds`. Increasing the value might make + generalization harder. distance: initial distance between agent and goal. """ self._bounds = bounds From 02d62b6cd8611a337c4314a4bbfc6156ea0db951 Mon Sep 17 00:00:00 2001 From: Pedro Freire Date: Tue, 30 Jun 2020 07:25:18 -0700 Subject: [PATCH 4/5] Fix docstring identation --- src/seals/diagnostics/proc_goal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py index 5347f07..e05412e 100644 --- a/src/seals/diagnostics/proc_goal.py +++ b/src/seals/diagnostics/proc_goal.py @@ -7,7 +7,7 @@ class ProcGoalEnv(base_envs.ResettableMDP): -"""Large gridworld with random agent and goal position. + """Large gridworld with random agent and goal position. In this task, the agent starts at a random position in a large grid, and must navigate to a goal randomly placed in a From 049fde8381e9e20460cda49b0010614e0b873b38 Mon Sep 17 00:00:00 2001 From: pedrofreire Date: Tue, 30 Jun 2020 23:11:43 +0200 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Adam Gleave --- src/seals/diagnostics/proc_goal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py index e05412e..e534b72 100644 --- a/src/seals/diagnostics/proc_goal.py +++ b/src/seals/diagnostics/proc_goal.py @@ -24,8 +24,8 @@ def __init__(self, bounds: int = 100, distance: int = 10): Args: bounds: the absolute values of the coordinates of the initial agent - position are bounded by `bounds`. Increasing the value might make - generalization harder. + position are bounded by `bounds`. Increasing the value might make + generalization harder. distance: initial distance between agent and goal. """ self._bounds = bounds