Merge branch 'zack/mac_install' into 'main'

Update docs for formatting; update formatters to modern packages I was trying to fix my local formatter setup and that sorta snowballed into cleaning up a bunch of related stuff. Now setting up a local environment should be painless, and I updated black and isort to the latest versions because we were running some really old stuff. The new ones have some nice new features and bugfixes. - improved docs for setting up a local environment - removed ray (pretty sure unused now) and downgraded openturns by one minor version which the author said was identical (to get it to be installable on mac) - cleaned up the pyfixfmt lock files and instructions. public usage is already on the github repo; reorienting the files in our repo to be aimed at internal usage. the lock files were kinda broken so i just updated everything to the most recent version and pinned those in a proper requirements file. - updated pyfixfmt to be compatible with the latest versions of black and isort - the versions we were using were 2+ years old. - redid isort configuration to work with isort 5.0. now, missing packages will automatically be classified as third-party, which should reduce the dependence on having your environment set up properly. as you can see in the diff, this already has fixed a bunch of stuff. - re-formatted the whole codebase with the new versions. the most notable change is that spaces around exponent operations were removed (psf/black#538); everything else is quite minor. - removed black and isort from the research dockerfile - we never use these tools in a container, do we? Once merged, I'll post in engineering to have everyone update to the new versions. See merge request generally-intelligent/generally_intelligent!419 Source: fcf664ad3bf53a38bfadf717e0f79c127a92102f
Avalon-Benchmark · Nov 3, 2022 · c4de56a · c4de56a
1 parent 02897c8
commit c4de56a
Show file tree

Hide file tree

Showing 36 changed files with 64 additions and 65 deletions.
diff --git a/avalon/agent/common/envs.py b/avalon/agent/common/envs.py
@@ -307,8 +307,8 @@ def __init__(
         all_actions: bool = False,
     ):
         assert size[0] == size[1]
-        import gym.wrappers
         import gym.envs.atari
+        import gym.wrappers
 
         if name == "james_bond":
             name = "jamesbond"

diff --git a/avalon/agent/dreamer/models.py b/avalon/agent/dreamer/models.py
@@ -402,7 +402,7 @@ def __init__(self, input_channels: int = 3, input_res: int = 96):
         in_channels = input_channels
         current_res = input_res
         for i, kernel in enumerate(kernels):
-            out_channels = 2 ** i * depth
+            out_channels = 2**i * depth
             conv = nn.Conv2d(
                 in_channels=in_channels, out_channels=out_channels, kernel_size=kernel, stride=2, padding=0
             )

diff --git a/avalon/agent/dreamer/truncated_normal.py b/avalon/agent/dreamer/truncated_normal.py
@@ -93,7 +93,7 @@ def auc(self) -> Tensor:
 
     @staticmethod
     def _little_phi(x: Tensor) -> Tensor:
-        return (-(x ** 2) * 0.5).exp() * CONST_INV_SQRT_2PI  # type:  ignore
+        return (-(x**2) * 0.5).exp() * CONST_INV_SQRT_2PI  # type:  ignore
 
     @staticmethod
     def _big_phi(x: Tensor) -> Tensor:
@@ -114,7 +114,7 @@ def icdf(self, value: Tensor) -> Tensor:
     def log_prob(self, value: Tensor) -> Tensor:
         if self._validate_args:
             self._validate_sample(value)
-        return CONST_LOG_INV_SQRT_2PI - self._log_Z - (value ** 2) * 0.5  # type: ignore
+        return CONST_LOG_INV_SQRT_2PI - self._log_Z - (value**2) * 0.5  # type: ignore
 
     def rsample(self, sample_shape: torch.Size = torch.Size()) -> Tensor:
         shape = self._extended_shape(sample_shape)
@@ -151,7 +151,7 @@ def __init__(
         super(TruncatedNormal, self).__init__(a, b, validate_args=validate_args)
         self._log_scale = self.scale.log()
         self._mean = self._mean * self.scale + self.loc
-        self._variance = self._variance * self.scale ** 2
+        self._variance = self._variance * self.scale**2
         self._entropy += self._log_scale
         self._mode = torch.clip(self.loc, self.low, self.high)
 

diff --git a/avalon/agent/torchbeast/atari_wrappers.py b/avalon/agent/torchbeast/atari_wrappers.py
@@ -28,12 +28,11 @@
 
 from collections import deque
 
+import cv2
 import gym
 import numpy as np
 from gym import spaces
 
-import cv2
-
 cv2.ocl.setUseOpenCL(False)
 
 
@@ -49,7 +48,7 @@ def __init__(self, env, noop_max=30):
         assert env.unwrapped.get_action_meanings()[0] == "NOOP"
 
     def reset(self, **kwargs):
-        """ Do no-op action for a number of steps in [1, noop_max]."""
+        """Do no-op action for a number of steps in [1, noop_max]."""
         self.env.reset(**kwargs)
         if self.override_num_noops is not None:
             noops = self.override_num_noops

diff --git a/avalon/agent/torchbeast/core/prof.py b/avalon/agent/torchbeast/core/prof.py
@@ -60,7 +60,7 @@ def vars(self):
         return self._vars
 
     def stds(self):
-        return {k: v ** 0.5 for k, v in self._vars.items()}
+        return {k: v**0.5 for k, v in self._vars.items()}
 
     def summary(self, prefix: str = "") -> str:
         means = self.means()

diff --git a/avalon/agent/torchbeast/evaluation.py b/avalon/agent/torchbeast/evaluation.py
@@ -11,10 +11,10 @@
 from typing import Optional
 from typing import Tuple
 
+import nest
 import sentry_sdk
 import torch
 
-import nest
 from avalon.agent.evaluation import EVAL_TEMP_PATH
 from avalon.agent.evaluation import EvaluationGodotEnv
 from avalon.agent.evaluation import get_wandb_result_key

diff --git a/avalon/agent/torchbeast/monobeast.py b/avalon/agent/torchbeast/monobeast.py
@@ -121,7 +121,7 @@ def get_flags():
 
 
 def compute_baseline_loss(advantages):
-    return 0.5 * torch.sum(advantages ** 2)
+    return 0.5 * torch.sum(advantages**2)
 
 
 def compute_entropy_loss(logits):

diff --git a/avalon/agent/torchbeast/polybeast_env.py b/avalon/agent/torchbeast/polybeast_env.py
@@ -18,12 +18,12 @@
 import multiprocessing as mp
 import time
 
+import libtorchbeast
 import numpy as np
 
 # yapf: disable
 from loguru import logger
 
-import libtorchbeast
 from avalon.agent.torchbeast import atari_wrappers
 from avalon.agent.torchbeast import avalon_helpers
 from avalon.agent.torchbeast.avalon_helpers import create_godot_env

diff --git a/avalon/agent/torchbeast/polybeast_learner.py b/avalon/agent/torchbeast/polybeast_learner.py
@@ -27,6 +27,8 @@
 from typing import Dict
 from typing import Set
 
+import libtorchbeast
+import nest
 import numpy as np
 import sentry_sdk
 import torch
@@ -35,8 +37,6 @@
 from torch import nn
 from torch.nn import functional as F
 
-import libtorchbeast
-import nest
 from avalon.agent.godot.godot_gym import CURRICULUM_BASE_PATH
 from avalon.agent.ppo.observation_model import ImpalaConvNet
 from avalon.agent.torchbeast.avalon_helpers import IS_PROPRIOCEPTION_USED
@@ -155,7 +155,7 @@ def _set_seed(seed):
 
 
 def compute_baseline_loss(advantages):
-    return 0.5 * torch.sum(advantages ** 2)
+    return 0.5 * torch.sum(advantages**2)
 
 
 def compute_entropy_loss(logits):

diff --git a/avalon/agent/train_dreamer_atari.py b/avalon/agent/train_dreamer_atari.py
@@ -7,7 +7,6 @@
 from avalon.agent.dreamer.params import DreamerParams
 from avalon.agent.train_dreamer_dmc import DreamerTrainer
 
-
 """
 - note: the time limit appears to be applied in agent env steps, not (action-repeated) env_steps.
 - the exact model params aren't entirely clear for the atari continuous latents.

diff --git a/avalon/common/test_style.py b/avalon/common/test_style.py
@@ -22,7 +22,7 @@ def test_code_contains_no_print_statement():
     ignore_test_files_and_notebooks = "grep -v -E '(tests?/|/test_|_test.py|.sync.py)'"
     ignore_lines_marked_with_script_comment = 'grep -v " # script"'
     result = subprocess.run(
-        fr'grep -E -r {exclusions} --include="*.py" "(^|\s)print\\(" | {ignore_test_files_and_notebooks} | {ignore_lines_marked_with_script_comment}',
+        rf'grep -E -r {exclusions} --include="*.py" "(^|\s)print\\(" | {ignore_test_files_and_notebooks} | {ignore_lines_marked_with_script_comment}',
         shell=True,
         capture_output=True,
         text=True,

diff --git a/avalon/common/utils.py b/avalon/common/utils.py
@@ -190,7 +190,7 @@ def dir_checksum(dir_path: Path, glob: str = "*") -> str:
     hash_md5 = hashlib.md5()
     for path in sorted(dir_path.glob(glob)):
         with open(path, "rb") as f:
-            for chunk in iter(lambda: f.read(2 ** 20), b""):
+            for chunk in iter(lambda: f.read(2**20), b""):
                 hash_md5.update(chunk)
     return hash_md5.hexdigest()
 

diff --git a/avalon/datagen/godot_env/goals.py b/avalon/datagen/godot_env/goals.py
@@ -152,7 +152,7 @@ class TrainingAvalonGoalEvaluator(AvalonGoalEvaluator):
     def get_level_frame_limit(self, world_params: GenerateAvalonWorldParams) -> int:
         super_frame_limit = super().get_level_frame_limit(world_params)
         dynamic_frame_limit = int(
-            TRAINING_FRAME_LIMITS_AT_ZERO_DIFFICULTY[world_params.task] * 10 ** world_params.difficulty
+            TRAINING_FRAME_LIMITS_AT_ZERO_DIFFICULTY[world_params.task] * 10**world_params.difficulty
         )
         return min(super_frame_limit, dynamic_frame_limit)
 

diff --git a/avalon/datagen/world_creation/entities/doors/hinge_door.py b/avalon/datagen/world_creation/entities/doors/hinge_door.py
@@ -134,7 +134,7 @@ def _get_handle_nodes(self, scene: GodotScene) -> List[GDNode]:
 
         # We cap the handle width to ensure it doesn't prevent the door from opening inwards
         max_width = math.sqrt(
-            (door_width - leeway) ** 2 - handle_thickness ** 2
+            (door_width - leeway) ** 2 - handle_thickness**2
         )  # diagonal of handle can't exceed door width
         handle_width = min(max_width, handle_width)
         handle_size = np.array([handle_width, handle_height, handle_thickness])

diff --git a/avalon/datagen/world_creation/indoor/builders.py b/avalon/datagen/world_creation/indoor/builders.py
@@ -309,7 +309,7 @@ class HouseLikeRoomBuilder(RoomBuilder):
 
     def build(self, story_footprint: np.ndarray, rand: np.random.Generator) -> List[Room]:
         world_length, world_width = story_footprint.shape
-        random_state = np.random.RandomState(seed=rand.integers(0, 2 ** 32 - 1))
+        random_state = np.random.RandomState(seed=rand.integers(0, 2**32 - 1))
 
         # Separate footprint into rectangles as "initial rooms"
         padded_footprint = story_footprint.copy()

diff --git a/avalon/datagen/world_creation/indoor/building.py b/avalon/datagen/world_creation/indoor/building.py
@@ -327,7 +327,7 @@ def export(
                     "transform": make_transform(position=light_centroid),
                     "light_energy": 0.5,
                     "omni_range": max(
-                        math.sqrt(first_story.width ** 2 + first_story.length ** 2) / 2, self.height.size / 2
+                        math.sqrt(first_story.width**2 + first_story.length**2) / 2, self.height.size / 2
                     ),
                 },
             )

diff --git a/avalon/datagen/world_creation/indoor/components.py b/avalon/datagen/world_creation/indoor/components.py
@@ -171,7 +171,7 @@ class Ramp(StoryLink):
     def get_link_length(self, bottom_story: "Story", top_story: "Story") -> float:
         run = self.slope_run
         rise = bottom_story.outer_height + top_story.floor_negative_depth
-        return math.sqrt(run ** 2 + rise ** 2)
+        return math.sqrt(run**2 + rise**2)
 
     @property
     def slope_run(self) -> float:
@@ -206,7 +206,7 @@ def _get_geometry(self, bottom_story: "Story", top_story: "Story") -> Tuple[floa
         slope_width = float(self.width)
         slope_run = float(self.slope_run)
         slope_rise = (bottom_story.outer_height - bottom_story_floor_positive_depth) + top_story_floor_depth
-        slope_length = math.sqrt(slope_run ** 2 + slope_rise ** 2)
+        slope_length = math.sqrt(slope_run**2 + slope_rise**2)
         slope_angle = math.atan(slope_rise / slope_run)
         slope_thickness = SLOPE_THICKNESS
 

diff --git a/avalon/datagen/world_creation/noise.py b/avalon/datagen/world_creation/noise.py
@@ -59,7 +59,7 @@ def lerp(a, b, x):  # type: ignore
 
 def fade(t):  # type: ignore
     "6t^5 - 15t^4 + 10t^3"
-    return 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3
+    return 6 * t**5 - 15 * t**4 + 10 * t**3
 
 
 def gradient(h, x, y):  # type: ignore

diff --git a/avalon/datagen/world_creation/tasks/avoid.py b/avalon/datagen/world_creation/tasks/avoid.py
@@ -107,7 +107,7 @@ def create_avoid_obstacle(
         difficulty, task_config.safety_radius_easy, task_config.safety_radius_hard
     )
     spawn_dist_sq = world.map.get_dist_sq_to(to_2d_point(locations.spawn))
-    nearby = spawn_dist_sq < spawn_safety_radius ** 2
+    nearby = spawn_dist_sq < spawn_safety_radius**2
     is_detail_important_new = world.is_detail_important.copy()
     is_detail_important_new[nearby] = True
     world = attr.evolve(world, is_detail_important=is_detail_important_new)

diff --git a/avalon/datagen/world_creation/tasks/descend.py b/avalon/datagen/world_creation/tasks/descend.py
@@ -182,7 +182,7 @@ def create_descend_obstacle(
         cliff_edges = np.logical_and(cliff_edges, locations.island)
         cliff_movement_distance = scale_with_difficulty(difficulty, 1.0, 6.0)
         for i in range(2):
-            nearby = world.map.get_dist_sq_to(to_2d_point(new_locations.spawn)) < cliff_movement_distance ** 2
+            nearby = world.map.get_dist_sq_to(to_2d_point(new_locations.spawn)) < cliff_movement_distance**2
             cliff_edges = np.logical_and(cliff_edges, nearby)
             if np.any(cliff_edges):
                 cliff_indices = cast(Tuple[int, int], tuple(rand.choice(np.argwhere(cliff_edges))))

diff --git a/avalon/datagen/world_creation/tasks/survive.py b/avalon/datagen/world_creation/tasks/survive.py
@@ -301,7 +301,7 @@ def add_fruit_tree_and_animals(
         difficulty, task_config.gathering_predator_dist_easy, task_config.gathering_predator_dist_hard
     )
     safe_mask = world.get_safe_mask(
-        sq_distances=sq_distances_from_tree, max_sq_dist=predator_dist ** 2, island_mask=None
+        sq_distances=sq_distances_from_tree, max_sq_dist=predator_dist**2, island_mask=None
     )
     for i in range(predator_count):
         position = world._get_safe_point(rand, safe_mask)
@@ -315,7 +315,7 @@ def add_fruit_tree_and_animals(
     prey_dist = scale_with_difficulty(
         difficulty, task_config.gathering_prey_dist_easy, task_config.gathering_prey_dist_hard
     )
-    safe_mask = world.get_safe_mask(sq_distances=sq_distances_from_tree, max_sq_dist=prey_dist ** 2, island_mask=None)
+    safe_mask = world.get_safe_mask(sq_distances=sq_distances_from_tree, max_sq_dist=prey_dist**2, island_mask=None)
     for i in range(prey_count):
         position = world._get_safe_point(rand, safe_mask)
         if position is None:
@@ -328,7 +328,7 @@ def add_fruit_tree_and_animals(
     tool_dist = scale_with_difficulty(
         difficulty, task_config.gathering_tool_dist_easy, task_config.gathering_tool_dist_hard
     )
-    safe_mask = world.get_safe_mask(sq_distances=sq_distances_from_tree, max_sq_dist=tool_dist ** 2, island_mask=None)
+    safe_mask = world.get_safe_mask(sq_distances=sq_distances_from_tree, max_sq_dist=tool_dist**2, island_mask=None)
     for i in range(tool_count):
         position = world._get_safe_point(rand, safe_mask)
         if position is None:
@@ -342,7 +342,7 @@ def add_fruit_tree_and_animals(
         difficulty, task_config.gathering_forage_food_dist_easy, task_config.gathering_forage_food_dist_hard
     )
     safe_mask = world.get_safe_mask(
-        sq_distances=sq_distances_from_tree, max_sq_dist=forage_food_dist ** 2, island_mask=None
+        sq_distances=sq_distances_from_tree, max_sq_dist=forage_food_dist**2, island_mask=None
     )
     for i in range(forage_food_count):
         position = world._get_safe_point(rand, safe_mask)

diff --git a/avalon/datagen/world_creation/worlds/biome_map.py b/avalon/datagen/world_creation/worlds/biome_map.py
@@ -265,7 +265,7 @@ def create_extra_height_points(
         is_detail_important: MapBoolNP,
     ) -> Point2DListNP:
         meters_per_cell = 1.0 / self.map.cells_per_meter
-        square_meters_per_cell = meters_per_cell ** 2
+        square_meters_per_cell = meters_per_cell**2
         # points_per_cell = round(point_density_in_points_per_square_meter * square_meters_per_cell)
         # if points_per_cell == 0:
         #     points_per_cell = 1
@@ -328,7 +328,7 @@ def rotation_matrix_from_vectors(b: np.ndarray):
     c = np.dot(a, b)
     s = np.linalg.norm(v)
     kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
-    rotation_matrix = _IDENTITY_MATRIX + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2))
+    rotation_matrix = _IDENTITY_MATRIX + kmat + kmat.dot(kmat) * ((1 - c) / (s**2))
     return rotation_matrix
 
 

diff --git a/avalon/datagen/world_creation/worlds/compositional.py b/avalon/datagen/world_creation/worlds/compositional.py
@@ -682,7 +682,7 @@ def create_compositional_task(
 
             # actually boost the spawn
             spawn_radius = 2.0
-            spawn_point_region = world.map.get_dist_sq_to(to_2d_point(solution.locations.spawn)) < spawn_radius ** 2
+            spawn_point_region = world.map.get_dist_sq_to(to_2d_point(solution.locations.spawn)) < spawn_radius**2
             if visibility_height_offset > 0.0:
                 map_new = world.map.copy()
                 map_new.raise_island(spawn_point_region, visibility_height_offset)
@@ -764,7 +764,7 @@ def add_extra_predators(
         max_distance = scale_with_difficulty(
             difficulty, task_config.predator_path_dist_easy, task_config.predator_path_dist_hard
         )
-        max_sq_dist = max_distance ** 2
+        max_sq_dist = max_distance**2
         for i in range(random_predator_count):
             position = world.get_safe_point(
                 rand, sq_distances=sq_path_distances, max_sq_dist=max_sq_dist, island_mask=solution.locations.island
@@ -777,7 +777,7 @@ def add_extra_predators(
         max_distance = scale_with_difficulty(
             difficulty, task_config.weapon_path_dist_easy, task_config.weapon_path_dist_hard
         )
-        max_sq_dist = max_distance ** 2
+        max_sq_dist = max_distance**2
         weapon_classes: List[Type[Weapon]] = []
         for i in range(weapon_count):
             safe_mask = world.get_safe_mask(

diff --git a/avalon/datagen/world_creation/worlds/creation.py b/avalon/datagen/world_creation/worlds/creation.py
@@ -269,7 +269,7 @@ def create_world_for_skill_scenario(
 
                 # also prevent things from being just way too close
                 if hard_min_distance > 0.0:
-                    sq_min_dist = hard_min_distance ** 2
+                    sq_min_dist = hard_min_distance**2
                     food_mask = np.logical_and(food_mask, world.map.get_dist_sq_to(spawn_point_2d) > sq_min_dist)
 
                 possible_food_points = np.stack([world.map.X, world.map.Y], axis=2)[food_mask]

diff --git a/avalon/datagen/world_creation/worlds/difficulty.py b/avalon/datagen/world_creation/worlds/difficulty.py
@@ -31,15 +31,15 @@ def select_categorical_difficulty(
     num_choices = len(choices)
     prob_coeff = 1.0 / sum(difficulty ** float(x) for x in range(num_choices))
     choice_prob = [
-        difficulty / num_choices + (1 - difficulty) * (difficulty ** x) * prob_coeff for x in range(num_choices)
+        difficulty / num_choices + (1 - difficulty) * (difficulty**x) * prob_coeff for x in range(num_choices)
     ]
     choice_idx = rand.choice(range(num_choices), p=choice_prob)
     if _FORCED is not None:
         assert _FORCED in choices
         choice_idx = choices.index(_FORCED)
 
     # TODO use other less arbitrary method to calculate?
-    new_difficulty = difficulty ** 2 + (1 - difficulty) * (difficulty ** (choice_idx + 1))
+    new_difficulty = difficulty**2 + (1 - difficulty) * (difficulty ** (choice_idx + 1))
     return choices[choice_idx], new_difficulty
 
 
@@ -67,7 +67,7 @@ def select_boolean_difficulty(
     if initial_prob == final_prob:
         return sampled_value < initial_prob, difficulty
     assert initial_prob > 0 and final_prob > 0, "Cannot have zero probability for True with log interpolation!"
-    prob = (initial_prob ** (1 - difficulty)) * (final_prob ** difficulty)
+    prob = (initial_prob ** (1 - difficulty)) * (final_prob**difficulty)
     value = sampled_value < prob
     # the updated difficulty is the integral of the probability to d over the integral to 1
     if value:

diff --git a/avalon/datagen/world_creation/worlds/height_map.py b/avalon/datagen/world_creation/worlds/height_map.py
@@ -156,7 +156,7 @@ def add_center_biased_noise(
         noise = rand.normal(scale, scale / 3.0, self.Z.shape)
         max_dist = mountain_radius * self.region.x.size
         dist_sq_to_mountain = self.get_dist_sq_to(np.array(mountain_center))
-        weight = 1.0 - (dist_sq_to_mountain / (max_dist ** 2))
+        weight = 1.0 - (dist_sq_to_mountain / (max_dist**2))
         self.Z = self.Z + np.clip(weight, 0.0, 1.0) * noise
 
     def get_land_mask(self) -> MapBoolNP:
@@ -964,12 +964,12 @@ def get_border_points(data: np.ndarray, rand: np.random.Generator, reduction: fl
     kernel = np.array(
         [
             [0, 1, 0],
-            [16, 0, 16 ** 2],
-            [0, 16 ** 3, 0],
+            [16, 0, 16**2],
+            [0, 16**3, 0],
         ]
     )
     sums = convolve(data, kernel)
-    expected_sums = data + data * 16 + data * 16 ** 2 + data * 16 ** 3
+    expected_sums = data + data * 16 + data * 16**2 + data * 16**3
     border = sums != expected_sums
     # plot_value_grid(border, "BORDER")
     # make it go 3x faster and add a little noise...  we are bad people