diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1414d749..58c744d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,3 +45,13 @@ repos: rev: v1.11.2 hooks: - id: mypy + +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + name: codespell + description: Checks for common misspellings in text files. + entry: codespell + language: python + types: [text] diff --git a/docs/api_documentation/core/pbt.md b/docs/api_documentation/core/pbt.md index 6f482d32..82cc1396 100644 --- a/docs/api_documentation/core/pbt.md +++ b/docs/api_documentation/core/pbt.md @@ -2,7 +2,7 @@ [PBT](https://arxiv.org/abs/1711.09846) is optimization method to jointly optimise a population of models and their hyperparameters to maximize performance. -To use PBT in QDax to train SAC, one can use the two following components (see [examples](../../examples/sac_pbt.ipynb) to see how to use the components appropriatly): +To use PBT in QDax to train SAC, one can use the two following components (see [examples](../../examples/sac_pbt.ipynb) to see how to use the components appropriately): ::: qdax.baselines.sac_pbt.PBTSAC diff --git a/docs/overview.md b/docs/overview.md index a5b6def7..00de8b20 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -1,6 +1,6 @@ # QDax Overview -QDax has been designed to be modular yet flexible so it's easy for anyone to use and extend on the different state-of-the-art QD algortihms available. +QDax has been designed to be modular yet flexible so it's easy for anyone to use and extend on the different state-of-the-art QD algorithms available. For instance, MAP-Elites is designed to work with a few modular and simple components: `container`, `emitter`, and `scoring_function`. ## Key concepts @@ -17,7 +17,7 @@ The `scoring_function` defines the problem/task we want to solve and functions t With this modularity, a user can easily swap out any one of the components and pass it to the `MAPElites` class, avoiding having to re-implement all the steps of the algorithm. Under one layer of abstraction, users have a bit more flexibility. QDax has similarities to the simple and commonly found `ask`/`tell` interface. The `ask` function is similar to the `emit` function in QDax and the `tell` function is similar to the `update` function in QDax. Likewise, the `eval` of solutions is analogous to the `scoring function` in QDax. -More importantly, QDax handles the archive management which is the key idea of QD algorihtms and not present or needed in standard optimization algorihtms or evolutionary strategies. +More importantly, QDax handles the archive management which is the key idea of QD algorithms and not present or needed in standard optimization algorithms or evolutionary strategies. ## Code Example ```python diff --git a/examples/aurora.ipynb b/examples/aurora.ipynb index 55a1db53..d9c01a1a 100644 --- a/examples/aurora.ipynb +++ b/examples/aurora.ipynb @@ -14,7 +14,7 @@ "# Optimizing with AURORA in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [AURORA](https://arxiv.org/pdf/1905.11874.pdf).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter\n", @@ -185,7 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", @@ -323,7 +323,7 @@ "\n", "@jax.jit\n", "def update_scan_fn(carry: Any, unused: Any) -> Any:\n", - " \"\"\"Scan the udpate function.\"\"\"\n", + " \"\"\"Scan the update function.\"\"\"\n", " (\n", " repertoire,\n", " random_key,\n", diff --git a/examples/cmaes.ipynb b/examples/cmaes.ipynb index a93326ba..6e81b989 100644 --- a/examples/cmaes.ipynb +++ b/examples/cmaes.ipynb @@ -15,7 +15,7 @@ "source": [ "# Optimizing with CMA-ES in Jax\n", "\n", - "This notebook shows how to use QDax to find performing parameters on Rastrigin and Sphere problems with [CMA-ES](https://arxiv.org/pdf/1604.00772.pdf). It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to find performing parameters on Rastrigin and Sphere problems with [CMA-ES](https://arxiv.org/pdf/1604.00772.pdf). It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create a CMA-ES optimizer\n", @@ -216,7 +216,7 @@ " # sample\n", " samples, random_key = cmaes.sample(state, random_key)\n", "\n", - " # udpate\n", + " # update\n", " state = cmaes.update(state, samples)\n", "\n", " # check stop condition\n", diff --git a/examples/cmame.ipynb b/examples/cmame.ipynb index d42dadb8..186c30ee 100644 --- a/examples/cmame.ipynb +++ b/examples/cmame.ipynb @@ -13,7 +13,7 @@ "source": [ "# Optimizing with CMA-ME in Jax\n", "\n", - "This notebook shows how to use QDax to find diverse and performing parameters on Rastrigin or Sphere problem with [CMA-ME](https://arxiv.org/pdf/1912.02400.pdf). It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to find diverse and performing parameters on Rastrigin or Sphere problem with [CMA-ME](https://arxiv.org/pdf/1912.02400.pdf). It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create a CMA-ME emitter\n", @@ -207,7 +207,7 @@ "source": [ "random_key = jax.random.key(0)\n", "# in CMA-ME settings (from the paper), there is no init population\n", - "# we multipy by zero to reproduce this setting\n", + "# we multiply by zero to reproduce this setting\n", "initial_population = jax.random.uniform(random_key, shape=(batch_size, num_dimensions)) * 0.\n", "\n", "centroids = compute_euclidean_centroids(\n", @@ -350,7 +350,7 @@ "axes[2].set_title(\"QD Score evolution during training\")\n", "axes[2].set_aspect(0.95 / axes[2].get_data_ratio(), adjustable=\"box\")\n", "\n", - "# udpate this variable to save your results locally\n", + "# update this variable to save your results locally\n", "savefig = False\n", "if savefig:\n", " figname = \"cma_me_\" + optim_problem + \"_\" + str(num_dimensions) + \"_\" + emitter_type + \".png\"\n", diff --git a/examples/cmamega.ipynb b/examples/cmamega.ipynb index 1a8eeafb..8674a0ef 100644 --- a/examples/cmamega.ipynb +++ b/examples/cmamega.ipynb @@ -13,7 +13,7 @@ "source": [ "# Optimizing with CMA-MEGA in Jax\n", "\n", - "This notebook shows how to use QDax to find diverse and performing parameters on the Rastrigin problem with [CMA-MEGA](https://arxiv.org/pdf/2106.03894.pdf). It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to find diverse and performing parameters on the Rastrigin problem with [CMA-MEGA](https://arxiv.org/pdf/2106.03894.pdf). It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create a cma-mega emitter\n", diff --git a/examples/dads.ipynb b/examples/dads.ipynb index 50d99d56..f7b417ae 100644 --- a/examples/dads.ipynb +++ b/examples/dads.ipynb @@ -13,7 +13,7 @@ "source": [ "# Training DADS with Jax\n", "\n", - "This notebook shows how to use QDax to train [DADS](https://arxiv.org/abs/1907.01657) on a Brax environment. It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to train [DADS](https://arxiv.org/abs/1907.01657) on a Brax environment. It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "- how to define an environment\n", "- how to define a replay buffer\n", "- how to create a dads instance\n", @@ -88,7 +88,7 @@ "\n", "Most hyperparameters are similar to those introduced in [SAC paper](https://arxiv.org/abs/1801.01290), [DIAYN paper](https://arxiv.org/abs/1802.06070) and [DADS paper](https://arxiv.org/abs/1907.01657).\n", "\n", - "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and dynamics. In DADS, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function direclty on the full state." + "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and dynamics. In DADS, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function directly on the full state." ] }, { @@ -258,7 +258,7 @@ " deterministic=True,\n", " env=eval_env,\n", " skills=skills,\n", - " evaluation=True, # needed by normalizatoin mecanism\n", + " evaluation=True, # needed by normalizatoin mechanism\n", ")\n", "\n", "play_step = functools.partial(\n", @@ -308,7 +308,7 @@ "source": [ "## Prepare last utils for the training loop\n", "\n", - "Many Reinforcement Learning algorithm have similar training process, that can be divided in a precise training step that is repeted several times. Most of the differences are captured inside the `play_step` and in the `update` functions. Hence, once those are defined, the iteration works in the same way. For this reason, instead of coding the same function for each algorithm, we have created the `do_iteration_fn` that can be used by most of them. In the training script, the user just has to partial the function to give `play_step`, `update` plus a few other parameter." + "Many Reinforcement Learning algorithm have similar training process, that can be divided in a precise training step that is repeated several times. Most of the differences are captured inside the `play_step` and in the `update` functions. Hence, once those are defined, the iteration works in the same way. For this reason, instead of coding the same function for each algorithm, we have created the `do_iteration_fn` that can be used by most of them. In the training script, the user just has to partial the function to give `play_step`, `update` plus a few other parameter." ] }, { diff --git a/examples/dcrlme.ipynb b/examples/dcrlme.ipynb index 057ef0c4..ff09dc5f 100644 --- a/examples/dcrlme.ipynb +++ b/examples/dcrlme.ipynb @@ -15,7 +15,7 @@ "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [Descriptor-Conditioned Reinforcement Learning MAP-Elites (DCRL-ME)](https://arxiv.org/abs/2401.08632).\n", "This algorithm extends and improves upon [Descriptor-Conditioned Gradients MAP-Elites (DCG-ME)](https://dl.acm.org/doi/abs/10.1145/3583131.3590503)\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create the DCRL emitter\n", @@ -200,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state: EnvState, policy_params: Params, random_key: RNGKey\n", ") -> Tuple[EnvState, Params, RNGKey, DCRLTransition]:\n", diff --git a/examples/diayn.ipynb b/examples/diayn.ipynb index d13ccad7..b58a0af0 100644 --- a/examples/diayn.ipynb +++ b/examples/diayn.ipynb @@ -13,7 +13,7 @@ "source": [ "# Training DIAYN with Jax\n", "\n", - "This notebook shows how to use QDax to train [DIAYN](https://arxiv.org/abs/1802.06070) on a Brax environment. It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to train [DIAYN](https://arxiv.org/abs/1802.06070) on a Brax environment. It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "- how to define an environment\n", "- how to define a replay buffer\n", "- how to create a diayn instance\n", @@ -89,7 +89,7 @@ "\n", "Most hyperparameters are similar to those introduced in [SAC paper](https://arxiv.org/abs/1801.01290) and [DIAYN paper](https://arxiv.org/abs/1802.06070).\n", "\n", - "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and discrimination. In DIAYN, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function direclty on the full state." + "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and discrimination. In DIAYN, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function directly on the full state." ] }, { @@ -299,7 +299,7 @@ "source": [ "## Prepare last utils for the training loop\n", "\n", - "Many Reinforcement Learning algorithm have similar training process, that can be divided in a precise training step that is repeted several times. Most of the differences are captured inside the `play_step` and in the `update` functions. Hence, once those are defined, the iteration works in the same way. For this reason, instead of coding the same function for each algorithm, we have created the `do_iteration_fn` that can be used by most of them. In the training script, the user just has to partial the function to give `play_step`, `update` plus a few other parameter." + "Many Reinforcement Learning algorithm have similar training process, that can be divided in a precise training step that is repeated several times. Most of the differences are captured inside the `play_step` and in the `update` functions. Hence, once those are defined, the iteration works in the same way. For this reason, instead of coding the same function for each algorithm, we have created the `do_iteration_fn` that can be used by most of them. In the training script, the user just has to partial the function to give `play_step`, `update` plus a few other parameter." ] }, { diff --git a/examples/distributed_mapelites.ipynb b/examples/distributed_mapelites.ipynb index d2b158da..517b55db 100644 --- a/examples/distributed_mapelites.ipynb +++ b/examples/distributed_mapelites.ipynb @@ -14,7 +14,7 @@ "# Optimizing with MAP-Elites in Jax (multi-devices example)\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [MAP-Elites](https://arxiv.org/abs/1504.04909).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter\n", @@ -215,7 +215,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", diff --git a/examples/mapelites.ipynb b/examples/mapelites.ipynb index 626fb5de..b6b4652a 100644 --- a/examples/mapelites.ipynb +++ b/examples/mapelites.ipynb @@ -14,7 +14,7 @@ "# Optimizing with MAP-Elites in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [MAP-Elites](https://arxiv.org/abs/1504.04909).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter\n", @@ -172,7 +172,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", diff --git a/examples/mees.ipynb b/examples/mees.ipynb index c09c7132..765a5986 100644 --- a/examples/mees.ipynb +++ b/examples/mees.ipynb @@ -18,7 +18,7 @@ "# Optimizing with MEES in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers with MAP-Elites-ES introduced in [Scaling MAP-Elites to Deep Neuroevolution](https://dl.acm.org/doi/pdf/10.1145/3377930.3390217).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create the MEES emitter\n", @@ -189,7 +189,7 @@ }, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", @@ -247,7 +247,7 @@ " behavior_descriptor_extractor=bd_extraction_fn,\n", ")\n", "\n", - "# Prepare the scoring functions for the offspring generated folllowing\n", + "# Prepare the scoring functions for the offspring generated following\n", "# the approximated gradient (each of them is evaluated 30 times)\n", "sampling_fn = functools.partial(\n", " sampling,\n", diff --git a/examples/mome.ipynb b/examples/mome.ipynb index 217f94be..8840b9e8 100644 --- a/examples/mome.ipynb +++ b/examples/mome.ipynb @@ -15,7 +15,7 @@ "source": [ "# Optimizing multiple objectives with MOME in Jax\n", "\n", - "This notebook shows how to use QDax to find diverse and performing parameters on a multi-objectives Rastrigin problem, using [Multi-Objective MAP-Elites](https://arxiv.org/pdf/2202.03057.pdf) (MOME) algorithm. It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to find diverse and performing parameters on a multi-objectives Rastrigin problem, using [Multi-Objective MAP-Elites](https://arxiv.org/pdf/2202.03057.pdf) (MOME) algorithm. It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter instance\n", diff --git a/examples/nsga2_spea2.ipynb b/examples/nsga2_spea2.ipynb index 2d157323..a7347130 100644 --- a/examples/nsga2_spea2.ipynb +++ b/examples/nsga2_spea2.ipynb @@ -13,7 +13,7 @@ "source": [ "# Optimizing multiple objectives with NSGA2 & SPEA2 in Jax\n", "\n", - "This notebook shows how to use QDax to find diverse and performing parameters on a multi-objectives Rastrigin problem, using [NSGA2](https://ieeexplore.ieee.org/document/996017) and [SPEA2](https://www.semanticscholar.org/paper/SPEA2%3A-Improving-the-strength-pareto-evolutionary-Zitzler-Laumanns/b13724cb54ae4171916f3f969d304b9e9752a57f) algorithms. It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to find diverse and performing parameters on a multi-objectives Rastrigin problem, using [NSGA2](https://ieeexplore.ieee.org/document/996017) and [SPEA2](https://www.semanticscholar.org/paper/SPEA2%3A-Improving-the-strength-pareto-evolutionary-Zitzler-Laumanns/b13724cb54ae4171916f3f969d304b9e9752a57f) algorithms. It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter instance\n", diff --git a/examples/omgmega.ipynb b/examples/omgmega.ipynb index 900fc812..5f3c69eb 100644 --- a/examples/omgmega.ipynb +++ b/examples/omgmega.ipynb @@ -14,7 +14,7 @@ "# Optimizing with OMG-MEGA in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing parameters on the Rastrigin problem with [OMG-MEGA](https://arxiv.org/pdf/2106.03894.pdf).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an omg-mega emitter\n", diff --git a/examples/pga_aurora.ipynb b/examples/pga_aurora.ipynb index c3c00ae5..330e82ed 100644 --- a/examples/pga_aurora.ipynb +++ b/examples/pga_aurora.ipynb @@ -14,7 +14,7 @@ "# Optimizing with PGA-AURORA in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [PGA-AURORA](https://arxiv.org/abs/2210.03516).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create an emitter\n", @@ -203,7 +203,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", @@ -369,7 +369,7 @@ "\n", "@jax.jit\n", "def update_scan_fn(carry: Any, unused: Any) -> Any:\n", - " \"\"\"Scan the udpate function.\"\"\"\n", + " \"\"\"Scan the update function.\"\"\"\n", " (\n", " repertoire,\n", " emitter_state,\n", diff --git a/examples/pgame.ipynb b/examples/pgame.ipynb index c5419a3f..bd83b0e8 100644 --- a/examples/pgame.ipynb +++ b/examples/pgame.ipynb @@ -14,7 +14,7 @@ "# Optimizing with PGAME in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [Policy Gradient Assisted MAP-Elites](https://hal.archives-ouvertes.fr/hal-03135723v2/file/PGA_MAP_Elites_GECCO.pdf).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create the PGAME emitter\n", @@ -179,7 +179,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", diff --git a/examples/qdpg.ipynb b/examples/qdpg.ipynb index a30c3be3..b2e68e35 100644 --- a/examples/qdpg.ipynb +++ b/examples/qdpg.ipynb @@ -14,7 +14,7 @@ "# Optimizing with QDPG in Jax\n", "\n", "This notebook shows how to use QDax to find diverse and performing controllers in MDPs with [QDPG - Quality Diversity Policy Gradient in MAP-Elites](https://arxiv.org/abs/2006.08505).\n", - "It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "\n", "- how to define the problem\n", "- how to create the QDPG emitter\n", @@ -193,7 +193,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the fonction to play a step with the policy in the environment\n", + "# Define the function to play a step with the policy in the environment\n", "def play_step_fn(\n", " env_state,\n", " policy_params,\n", diff --git a/examples/sac_pbt.ipynb b/examples/sac_pbt.ipynb index 53b526db..b4847bd8 100644 --- a/examples/sac_pbt.ipynb +++ b/examples/sac_pbt.ipynb @@ -292,7 +292,7 @@ }, "outputs": [], "source": [ - "# get eval policy fonction\n", + "# get eval policy function\n", "eval_policy = jax.pmap(agent.get_eval_fn(eval_env), axis_name=\"p\", devices=devices)" ] }, diff --git a/examples/smerl.ipynb b/examples/smerl.ipynb index ede905f9..08ee56e6 100644 --- a/examples/smerl.ipynb +++ b/examples/smerl.ipynb @@ -13,7 +13,7 @@ "source": [ "# Training DIAYN SMERL with Jax\n", "\n", - "This notebook shows how to use QDax to train DIAYN SMERL on a Brax environment. It can be run locally or on Google Colab. We recommand to use a GPU. This notebook will show:\n", + "This notebook shows how to use QDax to train DIAYN SMERL on a Brax environment. It can be run locally or on Google Colab. We recommend to use a GPU. This notebook will show:\n", "- how to define an environment\n", "- how to define a replay buffer\n", "- how to create a diayn smerl instance\n", @@ -89,7 +89,7 @@ "\n", "Most hyperparameters are similar to those introduced in [SAC paper](https://arxiv.org/abs/1801.01290), [DIAYN paper](https://arxiv.org/abs/1802.06070) and [SMERL paper](https://arxiv.org/pdf/2010.14484.pdf).\n", "\n", - "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and discrimination. In DIAYN, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function direclty on the full state." + "The parameter `descriptor_full_state` is less straightforward, it concerns the information used for diversity seeking and discrimination. In DIAYN, one can use the full state for diversity seeking, but one can also use a prior to focus on an interesting aspect of the state. Actually, priors are often used in experiments, for instance, focusing on the x/y position rather than the full position. When `descriptor_full_state` is set to True, it uses the full state, when it is set to False, it uses the 'state descriptor' retrieved by the environment. Hence, it is required that the environment has one. (All the `_uni`, `_omni` do, same for `anttrap`, `antmaze` and `pointmaze`.) In the future, we will add an option to use a prior function directly on the full state." ] }, { diff --git a/examples/td3_pbt.ipynb b/examples/td3_pbt.ipynb index 3bbf237e..90254907 100644 --- a/examples/td3_pbt.ipynb +++ b/examples/td3_pbt.ipynb @@ -252,7 +252,7 @@ }, "outputs": [], "source": [ - "# get eval policy fonction\n", + "# get eval policy function\n", "eval_policy = jax.pmap(agent.get_eval_fn(eval_env), axis_name=\"p\", devices=devices)" ] }, diff --git a/qdax/baselines/cmaes.py b/qdax/baselines/cmaes.py index 0e9b4084..ffe2c811 100644 --- a/qdax/baselines/cmaes.py +++ b/qdax/baselines/cmaes.py @@ -262,7 +262,7 @@ def update_eigen( # unpack data cov, num_updates = operand - # enfore symmetry - did not change anything + # enforce symmetry - did not change anything cov = jnp.triu(cov) + jnp.triu(cov, 1).T # get eigen decomposition: eigenvalues, eigenvectors diff --git a/qdax/baselines/dads.py b/qdax/baselines/dads.py index bd4f4534..be79286f 100644 --- a/qdax/baselines/dads.py +++ b/qdax/baselines/dads.py @@ -68,7 +68,7 @@ class DADS(SAC): of skills, is used to evaluate the skills in the environment and hence to generate transitions. The sampling is hence fixed and perfectly uniform. - We plan to add continous skill as an option in the future. We also plan + We plan to add continuous skill as an option in the future. We also plan to release the current constraint on the number of batched environments by sampling from the skills rather than having this fixed setting. """ @@ -499,7 +499,7 @@ def _update_networks( (training_state, transitions), ) - # udpate alpha + # update alpha ( alpha_params, alpha_optimizer_state, diff --git a/qdax/baselines/diayn.py b/qdax/baselines/diayn.py index 0ebdfc32..5f0d9a73 100644 --- a/qdax/baselines/diayn.py +++ b/qdax/baselines/diayn.py @@ -64,7 +64,7 @@ class DIAYN(SAC): Since we are using categorical skills, the current loss function used to train the discriminator is the categorical cross entropy loss. - We plan to add continous skill as an option in the future. We also plan + We plan to add continuous skill as an option in the future. We also plan to release the current constraint on the number of batched environments by sampling from the skills rather than having this fixed setting. """ @@ -408,7 +408,7 @@ def _update_networks( training_state.discriminator_params, discriminator_updates ) - # udpate alpha + # update alpha ( alpha_params, alpha_optimizer_state, diff --git a/qdax/baselines/sac.py b/qdax/baselines/sac.py index 482c5715..793cdd3f 100644 --- a/qdax/baselines/sac.py +++ b/qdax/baselines/sac.py @@ -162,7 +162,7 @@ def select_action( random_key: RNGKey, deterministic: bool = False, ) -> Tuple[Action, RNGKey]: - """Selects an action acording to SAC policy. + """Selects an action according to SAC policy. Args: obs: agent observation(s) diff --git a/qdax/core/containers/archive.py b/qdax/core/containers/archive.py index 036c5892..d2e1f812 100644 --- a/qdax/core/containers/archive.py +++ b/qdax/core/containers/archive.py @@ -15,9 +15,9 @@ class Archive(PyTreeNode): An example of use of the archive is the algorithm QDPG: state descriptors are stored in this archive and a novelty scorer compares - new state desciptors to the state descriptors stored in this archive. + new state descriptors to the state descriptors stored in this archive. - Note: notations suppose that the elements are called state desciptors. + Note: notations suppose that the elements are called state descriptors. If we where to use this structure for another application, it would be better to change the variables name for another one. Does not seem necessary at the moment though. @@ -157,7 +157,7 @@ def insert(self, state_descriptors: jnp.ndarray) -> Archive: """ state_descriptors = state_descriptors.reshape((-1, state_descriptors.shape[-1])) - # get nearest neigbor for each new state descriptor + # get nearest neighbor for each new state descriptor values, _indices = knn(self.data, state_descriptors, 1) # get indices where distance bigger than threshold @@ -185,7 +185,7 @@ def iterate_fn( state_descriptor = condition_data["state_descriptor"] # do the filtering among the added elements - # get nearest neigbor for each new state descriptor + # get nearest neighbor for each new state descriptor values, _indices = knn(new_elements, state_descriptor.reshape(1, -1), 1) # get indices where distance bigger than threshold @@ -253,7 +253,7 @@ def score_euclidean_novelty( def knn( data: jnp.ndarray, new_data: jnp.ndarray, k: jnp.ndarray ) -> Tuple[jnp.ndarray, jnp.ndarray]: - """K nearest neigbors - Brute force implementation. + """K nearest neighbors - Brute force implementation. Using euclidean distance. Code from https://www.kernel-operations.io/keops/_auto_benchmarks/ @@ -262,7 +262,7 @@ def knn( Args: data: given reference data. new_data: data to be compared to the reference data. - k: number of neigbors to consider. + k: number of neighbors to consider. Returns: The distances and indices of the nearest neighbors. diff --git a/qdax/core/containers/mapelites_repertoire.py b/qdax/core/containers/mapelites_repertoire.py index 87584eb3..2556470b 100644 --- a/qdax/core/containers/mapelites_repertoire.py +++ b/qdax/core/containers/mapelites_repertoire.py @@ -372,7 +372,7 @@ def init( fitnesses: fitness of the initial genotypes of shape (batch_size,) descriptors: descriptors of the initial genotypes of shape (batch_size, num_descriptors) - centroids: tesselation centroids of shape (batch_size, num_descriptors) + centroids: tessellation centroids of shape (batch_size, num_descriptors) extra_scores: unused extra_scores of the initial genotypes Returns: diff --git a/qdax/core/containers/repertoire.py b/qdax/core/containers/repertoire.py index 77c91683..24c9fbf9 100644 --- a/qdax/core/containers/repertoire.py +++ b/qdax/core/containers/repertoire.py @@ -48,6 +48,6 @@ def add(self) -> Repertoire: repertoire. Returns: - The udpated repertoire. + The updated repertoire. """ pass diff --git a/qdax/core/containers/spea2_repertoire.py b/qdax/core/containers/spea2_repertoire.py index 33c31547..e93fba85 100644 --- a/qdax/core/containers/spea2_repertoire.py +++ b/qdax/core/containers/spea2_repertoire.py @@ -60,7 +60,7 @@ def add( """Updates the population with the new solutions. To decide which individuals to keep, we count, for each solution, - the number of solutions by which tey are dominated. We keep only + the number of solutions by which they are dominated. We keep only the solutions that are the less dominated ones. Args: diff --git a/qdax/core/containers/unstructured_repertoire.py b/qdax/core/containers/unstructured_repertoire.py index 4a1c0cdb..32ac5689 100644 --- a/qdax/core/containers/unstructured_repertoire.py +++ b/qdax/core/containers/unstructured_repertoire.py @@ -144,7 +144,7 @@ class UnstructuredRepertoire(flax.struct.PyTreeNode): descriptors: an array that contains the descriptors of solutions in each cell of the repertoire, ordered by centroids. The array shape is (num_centroids, num_descriptors). - centroids: an array the contains the centroids of the tesselation. The array + centroids: an array the contains the centroids of the tessellation. The array shape is (num_centroids, num_descriptors). observations: observations that the genotype gathered in the environment. """ diff --git a/qdax/core/distributed_map_elites.py b/qdax/core/distributed_map_elites.py index dbc6522b..e8549005 100644 --- a/qdax/core/distributed_map_elites.py +++ b/qdax/core/distributed_map_elites.py @@ -164,7 +164,8 @@ def get_distributed_init_fn( devices: hardware devices. Returns: - A callable function that inits the MAP-Elites algorithm in a ditributed way. + A callable function that inits the MAP-Elites algorithm in a distributed + way. """ return jax.pmap( # type: ignore partial(self.init, centroids=centroids), diff --git a/qdax/core/emitters/cma_emitter.py b/qdax/core/emitters/cma_emitter.py index e3b476dd..9ac4eda1 100644 --- a/qdax/core/emitters/cma_emitter.py +++ b/qdax/core/emitters/cma_emitter.py @@ -33,7 +33,7 @@ class CMAEmitterState(EmitterState): subject to refactoring discussions in the future. cmaes_state: state of the underlying CMA-ES algorithm previous_fitnesses: store last fitnesses of the repertoire. Used to - compute the improvment. + compute the improvement. emit_count: count the number of emission events. """ @@ -380,7 +380,7 @@ def _ranking_criteria( fitnesses: corresponding fitnesses. descriptors: corresponding fitnesses. extra_scores: corresponding extra scores. - improvements: improvments of the emitted genotypes. This corresponds + improvements: improvements of the emitted genotypes. This corresponds to the difference between their fitness and the fitness of the individual occupying the cell of corresponding fitness. diff --git a/qdax/core/emitters/cma_improvement_emitter.py b/qdax/core/emitters/cma_improvement_emitter.py index 7c3fc98c..fd84bf17 100644 --- a/qdax/core/emitters/cma_improvement_emitter.py +++ b/qdax/core/emitters/cma_improvement_emitter.py @@ -47,7 +47,7 @@ def _ranking_criteria( fitnesses: corresponding fitnesses. descriptors: corresponding fitnesses. extra_scores: corresponding extra scores. - improvements: improvments of the emitted genotypes. This corresponds + improvements: improvements of the emitted genotypes. This corresponds to the difference between their fitness and the fitness of the individual occupying the cell of corresponding fitness. diff --git a/qdax/core/emitters/cma_mega_emitter.py b/qdax/core/emitters/cma_mega_emitter.py index 976f528b..b4e83b96 100644 --- a/qdax/core/emitters/cma_mega_emitter.py +++ b/qdax/core/emitters/cma_mega_emitter.py @@ -35,7 +35,7 @@ class CMAMEGAState(EmitterState): subject to refactoring discussions in the future. cmaes_state: state of the underlying CMA-ES algorithm previous_fitnesses: store last fitnesses of the repertoire. Used to - compute the improvment. + compute the improvement. """ theta: Genotype @@ -62,7 +62,7 @@ def __init__( Fontaine et al. Args: - scoring_function: a function to score individuals, outputing fitness, + scoring_function: a function to score individuals, outputting fitness, descriptors and extra scores. With this emitter, the extra score contains gradients and normalized gradients. batch_size: number of solutions sampled at each iteration diff --git a/qdax/core/emitters/cma_opt_emitter.py b/qdax/core/emitters/cma_opt_emitter.py index 9a783585..2e7e8bbc 100644 --- a/qdax/core/emitters/cma_opt_emitter.py +++ b/qdax/core/emitters/cma_opt_emitter.py @@ -31,7 +31,7 @@ def _ranking_criteria( fitnesses: corresponding fitnesses. descriptors: corresponding fitnesses. extra_scores: corresponding extra scores. - improvements: improvments of the emitted genotypes. This corresponds + improvements: improvements of the emitted genotypes. This corresponds to the difference between their fitness and the fitness of the individual occupying the cell of corresponding fitness. diff --git a/qdax/core/emitters/cma_rnd_emitter.py b/qdax/core/emitters/cma_rnd_emitter.py index 0715c437..c015922c 100644 --- a/qdax/core/emitters/cma_rnd_emitter.py +++ b/qdax/core/emitters/cma_rnd_emitter.py @@ -23,7 +23,7 @@ class CMARndEmitterState(CMAEmitterState): subject to refactoring discussions in the future. cmaes_state: state of the underlying CMA-ES algorithm previous_fitnesses: store last fitnesses of the repertoire. Used to - compute the improvment. + compute the improvement. emit_count: count the number of emission events. random_direction: direction of the behavior space we are trying to explore. @@ -148,7 +148,7 @@ def _ranking_criteria( fitnesses: corresponding fitnesses. descriptors: corresponding fitnesses. extra_scores: corresponding extra scores. - improvements: improvments of the emitted genotypes. This corresponds + improvements: improvements of the emitted genotypes. This corresponds to the difference between their fitness and the fitness of the individual occupying the cell of corresponding fitness. diff --git a/qdax/core/emitters/dpg_emitter.py b/qdax/core/emitters/dpg_emitter.py index ea921237..a30e87af 100644 --- a/qdax/core/emitters/dpg_emitter.py +++ b/qdax/core/emitters/dpg_emitter.py @@ -54,7 +54,7 @@ class DiversityPGEmitter(QualityPGEmitter): """ A diversity policy gradient emitter used to implement QDPG algorithm. - Please not that the inheritence between DiversityPGEmitter and QualityPGEmitter + Please not that the inheritance between DiversityPGEmitter and QualityPGEmitter could be increased with changes in the way transitions samples are handled in the QualityPGEmitter. But this would modify the computation/memory strategy of the current implementation. Hence, we won't apply this yet and will discuss this with diff --git a/qdax/core/emitters/emitter.py b/qdax/core/emitters/emitter.py index 21139356..d2a477a8 100644 --- a/qdax/core/emitters/emitter.py +++ b/qdax/core/emitters/emitter.py @@ -12,7 +12,7 @@ class EmitterState(PyTreeNode): """The state of an emitter. Emitters are used to suggest offspring when evolving a population of genotypes. To emit new genotypes, some - emitters need to have a state, that carries useful informations, like + emitters need to have a state, that carries useful information, like running means, distribution parameters, critics, replay buffers etc... The object emitter state is used to store them and is updated along @@ -89,7 +89,7 @@ def state_update( """This function gives an opportunity to update the emitter state after the genotypes have been scored. - As a matter of fact, many emitter states needs informations from + As a matter of fact, many emitter states needs information from the evaluations of the genotypes in order to be updated, for instance: - CMA emitter: to update the rank of the covariance matrix - PGA emitter: to fill the replay buffer and update the critic/greedy diff --git a/qdax/core/emitters/mees_emitter.py b/qdax/core/emitters/mees_emitter.py index 4d51326a..821b24da 100644 --- a/qdax/core/emitters/mees_emitter.py +++ b/qdax/core/emitters/mees_emitter.py @@ -449,7 +449,7 @@ def _es_emitter( scores_fn: Callable[[Fitness, Descriptor], jnp.ndarray], ) -> Tuple[Genotype, optax.OptState, RNGKey]: """Main es component, given a parent and a way to infer the score from - the fitnesses and descriptors fo its es-samples, return its + the fitnesses and descriptors of its es-samples, return its approximated-gradient-generated offspring. Args: @@ -677,7 +677,7 @@ def state_update( assert jax.tree_util.tree_leaves(genotypes)[0].shape[0] == 1, ( "ERROR: MAP-Elites-ES generates 1 offspring per generation, " - + "batch_size should be 1, the inputed batch has size:" + + "batch_size should be 1, the inputted batch has size:" + str(jax.tree_util.tree_leaves(genotypes)[0].shape[0]) ) diff --git a/qdax/core/emitters/omg_mega_emitter.py b/qdax/core/emitters/omg_mega_emitter.py index 580bd151..c7aac8c9 100644 --- a/qdax/core/emitters/omg_mega_emitter.py +++ b/qdax/core/emitters/omg_mega_emitter.py @@ -22,7 +22,7 @@ class OMGMEGAEmitterState(EmitterState): Args: gradients_repertoire: MapElites repertoire containing the gradients - of the indivuals. + of the individuals. """ gradients_repertoire: MapElitesRepertoire @@ -46,11 +46,11 @@ class OMGMEGAEmitter(Emitter): sampling. - in the state_update, we have to insert the gradients in the gradients repertoire in the same way the individuals were inserted. Once again, this is - slightly unoptimal because the same addition mecanism has to be computed two + slightly unoptimal because the same addition mechanism has to be computed two times. One solution that we are discussing and that is very similar to the first - solution discussed above, would be to decompose the addition mecanism in two - phases: one outputing the indices at which individuals will be added, and then - the actual insertion step. This would enable to re-use the same indices to add + solution discussed above, would be to decompose the addition mechanism in two + phases: one outputting the indices at which individuals will be added, and then + the actual insertion step. This would enable to reuse the same indices to add the gradients instead of having to recompute them. The two design choices seem acceptable and enable to have OMG MEGA compatible diff --git a/qdax/core/emitters/qdpg_emitter.py b/qdax/core/emitters/qdpg_emitter.py index b9de6090..3616a4b9 100644 --- a/qdax/core/emitters/qdpg_emitter.py +++ b/qdax/core/emitters/qdpg_emitter.py @@ -1,7 +1,7 @@ """Implementation of an updated version of the algorithm QDPG presented in the paper https://arxiv.org/abs/2006.08505. -QDPG has been udpated to enter in the container+emitter framework of QD. Furthermore, +QDPG has been updated to enter in the container+emitter framework of QD. Furthermore, it has been updated to work better with Jax in term of time cost. Those changes have been made in accordance with the authors of this algorithm. """ diff --git a/qdax/core/map_elites.py b/qdax/core/map_elites.py index d0b075a9..c3155dd3 100644 --- a/qdax/core/map_elites.py +++ b/qdax/core/map_elites.py @@ -24,7 +24,7 @@ class MAPElites: """Core elements of the MAP-Elites algorithm. Note: Although very similar to the GeneticAlgorithm, we decided to keep the - MAPElites class independant of the GeneticAlgorithm class at the moment to keep + MAPElites class independent of the GeneticAlgorithm class at the moment to keep elements explicit. Args: @@ -65,7 +65,7 @@ def init( Args: genotypes: initial genotypes, pytree in which leaves have shape (batch_size, num_features) - centroids: tesselation centroids of shape (batch_size, num_descriptors) + centroids: tessellation centroids of shape (batch_size, num_descriptors) random_key: a random key used for stochastic operations. Returns: diff --git a/qdax/environments/__init__.py b/qdax/environments/__init__.py index 054c75f7..f0b7e9d1 100644 --- a/qdax/environments/__init__.py +++ b/qdax/environments/__init__.py @@ -25,7 +25,7 @@ from qdax.environments.pointmaze import PointMaze from qdax.environments.wrappers import CompletedEvalWrapper -# experimentally determinated offset (except for antmaze) +# experimentally determined offset (except for antmaze) # should be sufficient to have only positive rewards but no guarantee reward_offset = { "pointmaze": 2.3431, diff --git a/qdax/environments/bd_extractors.py b/qdax/environments/bd_extractors.py index 918fbbfb..8649b74c 100644 --- a/qdax/environments/bd_extractors.py +++ b/qdax/environments/bd_extractors.py @@ -9,7 +9,7 @@ def get_final_xy_position(data: QDTransition, mask: jnp.ndarray) -> Descriptor: - """Compute final xy positon. + """Compute final xy position. This function suppose that state descriptor is the xy position, as it just select the final one of the state descriptors given. diff --git a/qdax/environments/pointmaze.py b/qdax/environments/pointmaze.py index 78f7c575..b299864f 100644 --- a/qdax/environments/pointmaze.py +++ b/qdax/environments/pointmaze.py @@ -110,7 +110,7 @@ def reset(self, rng: jp.ndarray) -> State: x_init = jp.random_uniform(rng1, (), low=self._x_min, high=self._x_max) / 10 y_init = jp.random_uniform(rng2, (), low=self._y_min, high=-0.7) obs_init = jp.array([x_init, y_init]) - # create fake qp (to re-use brax.State) + # create fake qp (to reuse brax.State) fake_qp = brax.QP.zero() # init reward, metrics and infos reward, done = jp.zeros(2) diff --git a/qdax/tasks/README.md b/qdax/tasks/README.md index bb3a09d8..d35c9125 100644 --- a/qdax/tasks/README.md +++ b/qdax/tasks/README.md @@ -1,7 +1,7 @@ # QD Tasks The `tasks` directory provides default `scoring_function`'s to import easily to perform experiments without the boilerplate code so that the main script is kept simple and is not bloated. It provides a set of fixed tasks that is not meant to be modified. If you are developing and require the flexibility of modifying the task and the details that come along with it, we recommend copying and writing your own custom `scoring_function` in your main script instead of importing from `tasks`. -The `tasks` directory also serves as a way to maintain a QD benchmark task suite that can be easily accesed. We implement several benchmark task across a range of domains. The tasks here are classical tasks from QD literature as well as more recent benchmarks tasks proposed at the [QD Benchmarks Workshop at GECCO 2022](https://quality-diversity.github.io/workshop). +The `tasks` directory also serves as a way to maintain a QD benchmark task suite that can be easily accessed. We implement several benchmark task across a range of domains. The tasks here are classical tasks from QD literature as well as more recent benchmarks tasks proposed at the [QD Benchmarks Workshop at GECCO 2022](https://quality-diversity.github.io/workshop). ## Arm | Task | Parameter Dimensions | Parameter Bounds | Descriptor Dimensions | Descriptor Bounds | Description | @@ -89,8 +89,8 @@ desc_size = 2 | Square | n | $[0,1]^n$ | n | $[0,1]^n$ | | | Checkered | n | $[0,1]^n$ | n | $[0,1]^n$ | | | Empty Circle | n | $[0,1]^n$ | n | $[0,1]^n$ | | -| Non-continous Islands | n | $[0,1]^n$ | n | $[0,1]^n$ | | -| Continous Islands | n | $[0,1]^n$ | n | $[0,1]^n$ | | +| Non-continuous Islands | n | $[0,1]^n$ | n | $[0,1]^n$ | | +| Continuous Islands | n | $[0,1]^n$ | n | $[0,1]^n$ | | ### Example Usage diff --git a/qdax/tasks/brax_envs.py b/qdax/tasks/brax_envs.py index 07d37d59..ea928fba 100644 --- a/qdax/tasks/brax_envs.py +++ b/qdax/tasks/brax_envs.py @@ -116,7 +116,7 @@ def scoring_function_brax_envs( This rollout is only deterministic when all the init states are the same. If the init states are fixed but different, as a policy is not necessarily - evaluated with the same environment everytime, this won't be determinist. + evaluated with the same environment every time, this won't be deterministic. When the init states are different, this is not purely stochastic. Args: @@ -186,7 +186,7 @@ def scoring_actor_dc_function_brax_envs( This rollout is only deterministic when all the init states are the same. If the init states are fixed but different, as a policy is not necessarily - evaluated with the same environment everytime, this won't be determinist. + evaluated with the same environment every time, this won't be determinist. When the init states are different, this is not purely stochastic. Args: @@ -514,8 +514,8 @@ def get_aurora_scoring_fn( """Evaluates policies contained in flatten_variables in parallel This rollout is only deterministic when all the init states are the same. - If the init states are fixed but different, as a policy is not necessarly - evaluated with the same environment everytime, this won't be determinist. + If the init states are fixed but different, as a policy is not necessary + evaluated with the same environment every time, this won't be deterministic. When the init states are different, this is not purely stochastic. This choice was made for performance reason, as the reset function of brax envs diff --git a/qdax/tasks/hypervolume_functions.py b/qdax/tasks/hypervolume_functions.py index 340581ab..bd9ac933 100644 --- a/qdax/tasks/hypervolume_functions.py +++ b/qdax/tasks/hypervolume_functions.py @@ -13,7 +13,7 @@ def square(params: Genotype) -> Tuple[Fitness, Descriptor]: """ - Seach space should be [0,1]^n + Search space should be [0,1]^n BD space should be [0,1]^n """ freq = 5 @@ -24,7 +24,7 @@ def square(params: Genotype) -> Tuple[Fitness, Descriptor]: def checkered(params: Genotype) -> Tuple[Fitness, Descriptor]: """ - Seach space should be [0,1]^n + Search space should be [0,1]^n BD space should be [0,1]^n """ freq = 5 @@ -35,7 +35,7 @@ def checkered(params: Genotype) -> Tuple[Fitness, Descriptor]: def empty_circle(params: Genotype) -> Tuple[Fitness, Descriptor]: """ - Seach space should be [0,1]^n + Search space should be [0,1]^n BD space should be [0,1]^n """ @@ -52,7 +52,7 @@ def _gaussian(x: jnp.ndarray, mu: float, sig: float) -> jnp.ndarray: def non_continous_islands(params: Genotype) -> Tuple[Fitness, Descriptor]: """ - Seach space should be [0,1]^n + Search space should be [0,1]^n BD space should be [0,1]^n """ f = jnp.prod(params) @@ -62,7 +62,7 @@ def non_continous_islands(params: Genotype) -> Tuple[Fitness, Descriptor]: def continous_islands(params: Genotype) -> Tuple[Fitness, Descriptor]: """ - Seach space should be [0,1]^n + Search space should be [0,1]^n BD space should be [0,1]^n """ coeff = 20 diff --git a/qdax/tasks/jumanji_envs.py b/qdax/tasks/jumanji_envs.py index 5f861f0e..68f2409c 100644 --- a/qdax/tasks/jumanji_envs.py +++ b/qdax/tasks/jumanji_envs.py @@ -153,8 +153,8 @@ def jumanji_scoring_function( deterministic or pseudo-deterministic environments. This rollout is only deterministic when all the init states are the same. - If the init states are fixed but different, as a policy is not necessarly - evaluated with the same environment everytime, this won't be determinist. + If the init states are fixed but different, as a policy is not necessary + evaluated with the same environment every time, this won't be deterministic. When the init states are different, this is not purely stochastic. """ diff --git a/qdax/utils/plotting.py b/qdax/utils/plotting.py index 7f0f086d..8320ba89 100644 --- a/qdax/utils/plotting.py +++ b/qdax/utils/plotting.py @@ -102,7 +102,7 @@ def plot_2d_map_elites_repertoire( Args: centroids: the centroids of the repertoire repertoire_fitnesses: the fitness of the repertoire - minval: minimum values for the descritors + minval: minimum values for the descriptors maxval: maximum values for the descriptors repertoire_descriptors: the descriptors. Defaults to None. ax: a matplotlib axe for the figure to plot. Defaults to None. @@ -229,7 +229,7 @@ def plot_map_elites_results( env_steps: the array containing the number of steps done in the environment. metrics: a dictionary containing metrics from the optimizatoin process. repertoire: the final repertoire obtained. - min_bd: the mimimal possible values for the bd. + min_bd: the minimal possible values for the bd. max_bd: the maximal possible values for the bd. Returns: diff --git a/qdax/utils/sampling.py b/qdax/utils/sampling.py index 94d4e160..fb0d37d3 100644 --- a/qdax/utils/sampling.py +++ b/qdax/utils/sampling.py @@ -263,7 +263,7 @@ def sampling_reproducibility( descriptor_reproducibility_extractor: Callable[[jnp.ndarray], jnp.ndarray] = std, ) -> Tuple[Fitness, Descriptor, ExtraScores, Fitness, Descriptor, RNGKey]: """Wrap scoring_function to perform sampling and compute the - expectation and reproduciblity. + expectation and reproducibility. This function return the reproducibility of fitnesses and descriptors for each individual over `num_samples` evaluations using the provided extractor diff --git a/tests/baselines_test/dcrlme_test.py b/tests/baselines_test/dcrlme_test.py index 1bc9688d..942abd67 100644 --- a/tests/baselines_test/dcrlme_test.py +++ b/tests/baselines_test/dcrlme_test.py @@ -104,7 +104,7 @@ def test_dcrlme() -> None: fake_batch_obs = jnp.zeros(shape=(batch_size, env.observation_size)) init_params = jax.vmap(policy_network.init)(keys, fake_batch_obs) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params, random_key: RNGKey ) -> Tuple[EnvState, Params, RNGKey, DCRLTransition]: diff --git a/tests/baselines_test/mees_test.py b/tests/baselines_test/mees_test.py index b5d56f1f..2eb280f1 100644 --- a/tests/baselines_test/mees_test.py +++ b/tests/baselines_test/mees_test.py @@ -63,7 +63,7 @@ def test_mees() -> None: fake_batch = jnp.zeros(shape=(1, env.observation_size)) init_variables = jax.vmap(policy_network.init)(keys, fake_batch) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params, diff --git a/tests/baselines_test/pgame_test.py b/tests/baselines_test/pgame_test.py index 639f1a9d..a9fd336e 100644 --- a/tests/baselines_test/pgame_test.py +++ b/tests/baselines_test/pgame_test.py @@ -70,7 +70,7 @@ def test_pgame() -> None: fake_batch = jnp.zeros(shape=(env_batch_size, env.observation_size)) init_variables = jax.vmap(policy_network.init)(keys, fake_batch) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params, diff --git a/tests/baselines_test/qdpg_test.py b/tests/baselines_test/qdpg_test.py index 7f1868f6..dfed7bb8 100644 --- a/tests/baselines_test/qdpg_test.py +++ b/tests/baselines_test/qdpg_test.py @@ -85,7 +85,7 @@ def test_qdpg() -> None: fake_batch = jnp.zeros(shape=(env_batch_size, env.observation_size)) init_variables = jax.vmap(policy_network.init)(keys, fake_batch) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params, diff --git a/tests/core_test/cmaes_test.py b/tests/core_test/cmaes_test.py index dc6078d1..c81ee19e 100644 --- a/tests/core_test/cmaes_test.py +++ b/tests/core_test/cmaes_test.py @@ -41,7 +41,7 @@ def sphere_scoring(x: jnp.ndarray) -> jnp.ndarray: # sample samples, random_key = cmaes.sample(state, random_key) - # udpate + # update state = cmaes.update(state, samples) # check stop condition diff --git a/tests/core_test/map_elites_test.py b/tests/core_test/map_elites_test.py index 61c90f06..0702b61e 100644 --- a/tests/core_test/map_elites_test.py +++ b/tests/core_test/map_elites_test.py @@ -73,7 +73,7 @@ def test_map_elites(env_name: str, batch_size: int) -> None: reset_fn = jax.jit(jax.vmap(env.reset)) init_states = reset_fn(keys) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params, diff --git a/tests/core_test/neuroevolution_test/buffers_test/trajectory_buffer_test.py b/tests/core_test/neuroevolution_test/buffers_test/trajectory_buffer_test.py index 12ea0874..97a91b0d 100644 --- a/tests/core_test/neuroevolution_test/buffers_test/trajectory_buffer_test.py +++ b/tests/core_test/neuroevolution_test/buffers_test/trajectory_buffer_test.py @@ -202,8 +202,8 @@ def test_trajectory_buffer_insert() -> None: multy_step_episodic_data, equal_nan=True, ), - "Episodic data when transitions are added sequentially is not consistent to \ - when they are added as batch.", + "Episodic data when transitions are added sequentially is not consistent to " + "when they are added as batch.", ) pytest.assume( diff --git a/tests/utils_test/sampling_test.py b/tests/utils_test/sampling_test.py index 981a546d..5c3c880f 100644 --- a/tests/utils_test/sampling_test.py +++ b/tests/utils_test/sampling_test.py @@ -50,7 +50,7 @@ def test_sampling() -> None: fake_batch = jnp.zeros(shape=(1, env.observation_size)) init_variables = jax.vmap(policy_network.init)(keys, fake_batch) - # Define the fonction to play a step with the policy in the environment + # Define the function to play a step with the policy in the environment def play_step_fn( env_state: EnvState, policy_params: Params,