Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ProcGoalEnv. #30

Merged
merged 6 commits into from
Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/seals/diagnostics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@
entry_point="seals.diagnostics.risky_path:RiskyPathEnv",
max_episode_steps=5,
)

gym.register(
id="seals/ProcGoal-v0",
entry_point="seals.diagnostics.proc_goal:ProcGoalEnv",
max_episode_steps=20,
)
62 changes: 62 additions & 0 deletions src/seals/diagnostics/proc_goal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Large gridworld with random agent and goal position."""

from gym import spaces
import numpy as np

from seals import base_envs, util


class ProcGoalEnv(base_envs.ResettableMDP):
"""Large gridworld with random agent and goal position.

In this task, the agent starts at a random position in a large
grid, and must navigate to a goal randomly placed in a
neighborhood around the agent. The observation is a 4-dimensional
vector containing the (x,y) coordinates of the agent and the goal.
The reward at each timestep is the negative Manhattan distance
between the two positions. With a large enough grid, generalizing
is necessary to achieve good performance, since most initial
states will be unseen.
"""

def __init__(self, bounds: int = 100, distance: int = 10):
"""Constructs environment.

Args:
bounds: the absolute values of the coordinates of the initial agent
position are bounded by `bounds`. Increasing the value might make
pedrofreire marked this conversation as resolved.
Show resolved Hide resolved
generalization harder.
pedrofreire marked this conversation as resolved.
Show resolved Hide resolved
distance: initial distance between agent and goal.
"""
self._bounds = bounds
self._distance = distance

super().__init__(
state_space=spaces.Box(low=-np.inf, high=np.inf, shape=(4,)),
action_space=spaces.Discrete(5),
)

def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
"""Always returns False."""
return False

def initial_state(self) -> np.ndarray:
"""Samples random agent position and random goal."""
pos = self.rand_state.randint(low=-self._bounds, high=self._bounds, size=(2,))

x_dist = self.rand_state.randint(self._distance)
y_dist = self._distance - x_dist
random_signs = 2 * self.rand_state.randint(2, size=2) - 1
goal = pos + random_signs * (x_dist, y_dist)

return np.concatenate([pos, goal]).astype(self.observation_space.dtype)

def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
"""Negative L1 distance to goal."""
return (-1) * np.sum(np.abs(state[2:] - state[:2]))

def transition(self, state: np.ndarray, action: int) -> np.ndarray:
"""Returns next state according to grid."""
pos, goal = state[:2], state[2:]
next_pos = util.grid_transition_fn(pos, action)
return np.concatenate([next_pos, goal])
38 changes: 37 additions & 1 deletion src/seals/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Miscellaneous utilities."""

from typing import Optional
from typing import Optional, Tuple

import gym
import numpy as np
Expand Down Expand Up @@ -118,3 +118,39 @@ def sample_distribution(
def one_hot_encoding(pos: int, size: int) -> np.ndarray:
"""Returns a 1-D hot encoding of a given position and size."""
return np.eye(size)[pos]


def grid_transition_fn(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've commented on this before in #23 -- make sure you address those comments and merge the previous PR before this one.

state: np.ndarray,
action: int,
x_bounds: Tuple[float, float] = (-np.inf, np.inf),
y_bounds: Tuple[float, float] = (-np.inf, np.inf),
):
"""Returns transition of a deterministic gridworld.

Agent is bounded in the region limited by x_bounds and y_bounds,
ends inclusive.

Actions:
0: Right
1: Down
2: Left
3: Up
4: Stay put
"""
dirs = [
(1, 0),
(0, 1),
(-1, 0),
(0, -1),
(0, 0),
]

x, y = state
dx, dy = dirs[action]

next_x = np.clip(x + dx, *x_bounds)
next_y = np.clip(y + dy, *y_bounds)
next_state = np.array([next_x, next_y], dtype=state.dtype)

return next_state