HumanCompatibleAI · AdamGleave · Nov 22, 2022 · Oct 7, 2022 · Oct 10, 2022 · Oct 17, 2022
diff --git a/src/seals/atari.py b/src/seals/atari.py
@@ -1,15 +1,48 @@
 """Adaptation of Atari environments for specification learning algorithms."""
 
-from typing import Iterable
+from typing import Dict, Iterable, List, Optional, Tuple
 
 import gym
 
-from seals.util import AutoResetWrapper, get_gym_max_episode_steps
+from seals.util import AutoResetWrapper, MaskScoreWrapper, get_gym_max_episode_steps
 
+SCORE_REGIONS: Dict[str, List[Dict[str, Tuple[int, int]]]] = {
+    "BeamRider": [
+        dict(x=(5, 20), y=(45, 120)),
+        dict(x=(28, 40), y=(15, 40)),
+    ],
+    "Breakout": [dict(x=(0, 16), y=(35, 80))],
+    "Enduro": [
+        dict(x=(163, 173), y=(55, 110)),
+        dict(x=(177, 188), y=(68, 107)),
+    ],
+    "Pong": [dict(x=(0, 24), y=(0, 160))],
+    "Qbert": [dict(x=(6, 15), y=(33, 71))],
+    "Seaquest": [dict(x=(7, 19), y=(80, 110))],
+    "SpaceInvaders": [dict(x=(10, 20), y=(0, 160))],
+}
 
-def fixed_length_atari(atari_env_id: str) -> gym.Env:
-    """Fixed-length variant of a given Atari environment."""
-    return AutoResetWrapper(gym.make(atari_env_id))
+
+def _get_score_region(atari_env_id: str) -> Optional[List[Dict[str, Tuple[int, int]]]]:
+    basename = atari_env_id.split("/")[-1].split("-")[0]
+    basename = basename.replace("NoFrameskip", "")
+    return SCORE_REGIONS.get(basename)
+
+
+def make_atari_env(atari_env_id: str, masked: bool) -> gym.Env:
+    """Fixed-length, optionally masked-score variant of a given Atari environment."""
+    env = AutoResetWrapper(gym.make(atari_env_id))
+
+    if masked:
+        score_region = _get_score_region(atari_env_id)
+        if score_region is None:
+            raise ValueError(
+                "Requested environment does not yet support masking. "
+                + "See https://github.com/HumanCompatibleAI/seals/issues/61.",
+            )
+        env = MaskScoreWrapper(env, score_region)
+
+    return env
 
 
 def _not_ram_or_det(env_id: str) -> bool:
@@ -37,20 +70,32 @@ def _supported_atari_env(gym_spec: gym.envs.registration.EnvSpec) -> bool:
     )
 
 
-def _seals_name(gym_spec: gym.envs.registration.EnvSpec) -> str:
+def _seals_name(gym_spec: gym.envs.registration.EnvSpec, masked: bool) -> str:
     """Makes a Gym ID for an Atari environment in the seals namespace."""
     slash_separated = gym_spec.id.split("/")
-    return "seals/" + slash_separated[-1]
+    name = "seals/" + slash_separated[-1]
+
+    if not masked:
+        last_hyphen_idx = name.rfind("-")
+        name = name[:last_hyphen_idx] + "-Unmasked" + name[last_hyphen_idx:]
+    return name
 
 
 def register_atari_envs(
     gym_atari_env_specs: Iterable[gym.envs.registration.EnvSpec],
 ) -> None:
-    """Register wrapped gym Atari environments."""
+    """Register masked and unmasked wrapped gym Atari environments."""
     for gym_spec in gym_atari_env_specs:
         gym.register(
-            id=_seals_name(gym_spec),
-            entry_point="seals.atari:fixed_length_atari",
+            id=_seals_name(gym_spec, masked=False),
+            entry_point="seals.atari:make_atari_env",
             max_episode_steps=get_gym_max_episode_steps(gym_spec.id),
-            kwargs=dict(atari_env_id=gym_spec.id),
+            kwargs=dict(atari_env_id=gym_spec.id, masked=False),
         )
+        if _get_score_region(gym_spec.id) is not None:
+            gym.register(
+                id=_seals_name(gym_spec, masked=True),
+                entry_point="seals.atari:make_atari_env",
+                max_episode_steps=get_gym_max_episode_steps(gym_spec.id),
+                kwargs=dict(atari_env_id=gym_spec.id, masked=True),
+            )
diff --git a/src/seals/util.py b/src/seals/util.py
@@ -1,6 +1,6 @@
 """Miscellaneous utilities."""
 
-from typing import Optional, Tuple
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import gym
 import numpy as np
@@ -23,6 +23,52 @@ def step(self, action):
         return obs, rew, False, info
 
 
+class MaskScoreWrapper(gym.Wrapper):
+    """Mask a list of box-shaped regions in the observation to hide reward info.
+
+    Intended for environments whose observations are raw pixels (like atari
-    Intended for environments whose observations are raw pixels (like atari
+    Intended for environments whose observations are raw pixels (like Atari
-    Intended for environments whose observations are raw pixels (like atari
+    Intended for environments whose observations are raw pixels (like Atari
+    environments). Used to mask regions of the observation that include information
+    that could be used to infer the reward, like the game score or enemy ship count.
+    """
+
+    def __init__(
+        self,
+        env: gym.Env,
+        score_regions: List[Dict[str, Tuple[int, int]]],
+        fill_value: Union[float, Sequence[float]] = 0,
+    ):
+        """Builds MaskScoreWrapper.
+
+        Args:
+            env: The environment to wrap.
+            score_regions: A list of box-shaped regions to mask, each denoted by
+                a dictionary `{"x": (x0, x1), "y": (y0, y1)}`, where `x0 < x1`
+                and `y0 < y1`.
+            fill_value: The fill_value for the masked region. By default is black.
+                Can support RGB colors by being a sequence of values [r, g, b].
+        """
+        super().__init__(env)
+        self.fill_value = np.array(fill_value, env.observation_space.dtype)
+
+        self.mask = np.ones(env.observation_space.shape, dtype=bool)
+        for r in score_regions:
+            assert r["x"][0] < r["x"][1] and r["y"][0] < r["y"][1]
+            self.mask[r["x"][0] : r["x"][1], r["y"][0] : r["y"][1]] = 0
+
+    def _mask_obs(self, obs):
+        return np.where(self.mask, obs, self.fill_value)
+
+    def step(self, action):
+        """Returns (obs, rew, done, info) with masked obs."""
+        obs, rew, done, info = self.env.step(action)
+        return self._mask_obs(obs), rew, done, info
+
+    def reset(self, **kwargs):
+        """Returns masked reset observation."""
+        obs = self.env.reset(**kwargs)
+        return self._mask_obs(obs)
+
+
 class ObsCastWrapper(gym.Wrapper):
     """Cast observations to specified dtype.
 

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -7,7 +7,7 @@
 import pytest
 
 import seals  # noqa: F401 required for env registration
-from seals.atari import _seals_name
+from seals.atari import _get_score_region, _seals_name
 from seals.testing import envs
 
 ENV_NAMES: List[str] = [
@@ -26,7 +26,11 @@
 ]
 
 ATARI_ENVS: List[str] = [
-    _seals_name(gym_spec) for gym_spec in seals.GYM_ATARI_ENV_SPECS
+    _seals_name(gym_spec, masked=False) for gym_spec in seals.GYM_ATARI_ENV_SPECS
+] + [
+    _seals_name(gym_spec, masked=True)
+    for gym_spec in seals.GYM_ATARI_ENV_SPECS
+    if _get_score_region(gym_spec.id) is not None
 ]
 
 ATARI_V5_ENVS: List[str] = list(filter(lambda name: name.endswith("-v5"), ATARI_ENVS))
@@ -46,14 +50,31 @@ def test_some_atari_envs():
 
 
 def test_atari_space_invaders():
-    """Tests if there's an Atari environment called space invaders."""
-    space_invader_environments = list(
+    """Tests for masked and unmasked Atari space invaders environments."""
+    masked_space_invader_environments = list(
         filter(
-            lambda name: "SpaceInvaders" in name,
+            lambda name: "SpaceInvaders" in name and "Unmasked" not in name,
             ATARI_ENVS,
         ),
     )
-    assert len(space_invader_environments) > 0
+    assert len(masked_space_invader_environments) > 0
+
+    unmasked_space_invader_environments = list(
+        filter(
+            lambda name: "SpaceInvaders" in name and "Unmasked" in name,
+            ATARI_ENVS,
+        ),
+    )
+    assert len(unmasked_space_invader_environments) > 0
+
+
+def test_atari_unmasked_env_naming():
+    """Tests that all unmasked Atari envs have the appropriate name qualifier."""
+    noncompliant_envs = [
+        (_get_score_region(name) is None and "Unmasked" not in name)
+        for name in ATARI_ENVS
+    ]
+    assert len(noncompliant_envs) == 0
 
 
 @pytest.mark.parametrize("env_name", ENV_NAMES)