diff --git a/docs/source/_static/suzuki_reizman.gif b/docs/source/_static/suzuki_reizman.gif new file mode 100644 index 00000000..0791ce7f Binary files /dev/null and b/docs/source/_static/suzuki_reizman.gif differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 99e989f6..74c7fc0c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -51,11 +51,8 @@ "sphinx.ext.intersphinx", # read the docs theme "sphinx_rtd_theme", - # show plots - "matplotlib.sphinxext.mathmpl", - "matplotlib.sphinxext.plot_directive", - # Doctest - "sphinx.ext.doctest", + # Redirects + "sphinx_reredirects", ] # Add any paths that contain templates here, relative to this directory. @@ -116,6 +113,13 @@ def linkcode_resolve(domain, info): return "https://somesite/sourcerepo/%s.py" % filename +# -- Options for redirects---------------------------------------------------- + +redirects = { + "tutorial": "tutorials/intro.html", + "experiments_benchmarks/new_benchmarks": "../tutorials/new_benchmarks.html", +} + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/docs/source/experiments_benchmarks/experimental_emulator.rst b/docs/source/experiments_benchmarks/experimental_emulator.rst index b91d600f..94d1b8e0 100644 --- a/docs/source/experiments_benchmarks/experimental_emulator.rst +++ b/docs/source/experiments_benchmarks/experimental_emulator.rst @@ -6,4 +6,7 @@ Experimental Emulator API .. autoclass:: summit.benchmarks.ANNRegressor + :members: + +.. autoclass:: summit.benchmarks.RegressorRegistry :members: \ No newline at end of file diff --git a/docs/source/experiments_benchmarks/index.rst b/docs/source/experiments_benchmarks/index.rst index bac3f240..b0d8f7e5 100644 --- a/docs/source/experiments_benchmarks/index.rst +++ b/docs/source/experiments_benchmarks/index.rst @@ -1,15 +1,14 @@ Experiments / Benchmarks ======================== -The :class:`~summit.experiment.Experiment` class provides a generic way of representing chemical reactions, virtual or real. We leverage this class to create the benchmarks available here. To get some insight into how to hook Summit up to real experimetns, look at the tutorial_. +The :class:`~summit.experiment.Experiment` class provides a generic way of representing chemical reactions, virtual or real. We leverage this class to create the benchmarks available in Summit. You can also create your own benchmarks based on experimental data (see here_). To get some insight into how to hook Summit up to real experiments, look at the introductory tutorial_. Here, we present the already implemented benchmarks and show you how to create new onces. -.. _tutorial: ../tutorial.ipynb -.. _new_benchmarks: new_benchmarks.rst +.. _here: ../tutorials/new_benchmarks.ipynb +.. _tutorial: ../tutorials/tutorial.ipynb .. toctree:: implemented_benchmarks - new_benchmarks experiment experimental_emulator \ No newline at end of file diff --git a/docs/source/experiments_benchmarks/new_benchmarks.ipynb b/docs/source/experiments_benchmarks/new_benchmarks.ipynb deleted file mode 100644 index 4c47a1f2..00000000 --- a/docs/source/experiments_benchmarks/new_benchmarks.ipynb +++ /dev/null @@ -1,329 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Creating New Benchmarks\n", - "\nHere we give a demonstration of how to train a new benchmark based on experimental data. We call these type of benchmarks `ExperimentalEmulator`. As an example, we are going to create a benchmark for the Suzuki-Miyaura Cross-Coupling reaction in [Reizman et al. (2016)](https://doi.org/10.1039/C6RE00153J). " - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "## Google Colab\n", - "\nIf you would like to follow along with this tutorial, you can open it in Google Colab using the button below." - ], - "metadata": {} - }, - { - "cell_type": "raw", - "source": [ - "|colab_badge|" - ], - "metadata": { - "raw_mimetype": "text/restructuredtext" - } - }, - { - "cell_type": "markdown", - "source": [ - "You will need to run the following cell to make sure Summit and all its dependencies are installed. If prompted, restart the runtime." - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "!pip install summit" - ], - "outputs": [], - "execution_count": null, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "## Create the domain\n", - "\nLet's first import the needed parts of Summit." - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "from summit.benchmarks import ExperimentalEmulator\n", - "from summit.domain import *\n", - "from summit.utils.dataset import DataSet\n", - "import pkg_resources\n", - "import pathlib\n", - "import pprint" - ], - "outputs": [], - "execution_count": 1, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "We first need to create a domain. A domain contains all the decision variables, constraints and objectives for a benchmark." - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "domain = Domain()" - ], - "outputs": [], - "execution_count": 2, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Above, we instantiate a new domain without any variables. Here, we are going to manipulate the catalyst, base, catalyst loading, base stoichiometry and temperature. Our objectives are to maximise yield and minimise turn over number (TON). We can use the increment operator `+=` to add variables to the domain." - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "# Decision variables\n", - "des_1 = \"Catalyst type - different ligands\"\n", - "domain += CategoricalVariable(\n", - " name=\"catalyst\",\n", - " description=des_1,\n", - " levels=[\n", - " \"P1-L1\",\n", - " \"P2-L1\",\n", - " \"P1-L2\",\n", - " \"P1-L3\",\n", - " \"P1-L4\",\n", - " \"P1-L5\",\n", - " \"P1-L6\",\n", - " \"P1-L7\",\n", - " ],\n", - ")\n", - "\n", - "des_2 = \"Residence time in seconds (s)\"\n", - "domain += ContinuousVariable(name=\"t_res\", description=des_2, bounds=[60, 600])\n", - "\n", - "des_3 = \"Reactor temperature in degrees Celsius (ºC)\"\n", - "domain += ContinuousVariable(\n", - " name=\"temperature\", description=des_3, bounds=[30, 110]\n", - ")\n", - "\n", - "des_4 = \"Catalyst loading in mol%\"\n", - "domain += ContinuousVariable(\n", - " name=\"catalyst_loading\", description=des_4, bounds=[0.5, 2.5]\n", - ")\n", - "\n", - "# Objectives\n", - "des_5 = (\n", - " \"Turnover number - moles product generated divided by moles catalyst used\"\n", - ")\n", - "domain += ContinuousVariable(\n", - " name=\"ton\",\n", - " description=des_5,\n", - " bounds=[0, 200], # TODO: not sure about bounds, maybe redefine\n", - " is_objective=True,\n", - " maximize=True,\n", - ")\n", - "\n", - "des_6 = \"Yield\"\n", - "domain += ContinuousVariable(\n", - " name=\"yield\",\n", - " description=des_6,\n", - " bounds=[0, 100],\n", - " is_objective=True,\n", - " maximize=True,\n", - ")\n", - "\ndomain" - ], - "outputs": [ - { - "output_type": "execute_result", - "execution_count": 3, - "data": { - "text/html": [ - "
NameTypeDescriptionValues
catalystcategorical, inputCatalyst type - different ligands8 levels
t_rescontinuous, inputResidence time in seconds (s)[60,600]
temperaturecontinuous, inputReactor temperature in degrees Celsius (ºC)[30,110]
catalyst_loadingcontinuous, inputCatalyst loading in mol%[0.5,2.5]
toncontinuous, maximize objectiveTurnover number - moles product generated divided by moles catalyst used[0,200]
yieldcontinuous, maximize objectiveYield[0,100]
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {} - } - ], - "execution_count": 3, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "## Create the Experimental Emulator\n", - "\nNow we just need two lines of code to train the experimental emulator! We first instantiate `ExperimentalEmulator` passing in the domain and a name for the model. Next we train it with two-fold cross-validation and a test set size of 25%. Make sure to replace the `csv_dataset` keyword argument with the path to your csv file. When you run this code, you will see the outputs from the training loop." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Here, we import the data that we already have in the Summit package, but you could use your own data. Change verbose to 1 if you want streaming updates of the training." - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "DATA_PATH = pathlib.Path(pkg_resources.resource_filename(\"summit\", \"benchmarks/data\"))\n", - "ds = DataSet.read_csv(DATA_PATH / \"reizman_suzuki_case_1.csv\",)\n", - "emul = ExperimentalEmulator(model_name='my_reizman', domain=domain, dataset=ds)\n", - "res = emul.train(max_epochs=100, cv_fold=2, test_size=0.25, verbose=0)" - ], - "outputs": [], - "execution_count": 4, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Now that the interal model is trained, we can use the experimental emulator. I print out the domain again to remind us of the variables" - ], - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "domain" - ], - "outputs": [ - { - "output_type": "execute_result", - "execution_count": 5, - "data": { - "text/html": [ - "
NameTypeDescriptionValues
catalystcategorical, inputCatalyst type - different ligands8 levels
t_rescontinuous, inputResidence time in seconds (s)[60,600]
temperaturecontinuous, inputReactor temperature in degrees Celsius (ºC)[30,110]
catalyst_loadingcontinuous, inputCatalyst loading in mol%[0.5,2.5]
toncontinuous, maximize objectiveTurnover number - moles product generated divided by moles catalyst used[0,200]
yieldcontinuous, maximize objectiveYield[0,100]
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {} - } - ], - "execution_count": 5, - "metadata": {} - }, - { - "cell_type": "code", - "source": [ - "conditions = [[\"P1-L1\", 60, 100, 1.0]]\n", - "conditions = DataSet(conditions, columns=[v.name for v in domain.input_variables])\n", - "emul.run_experiments(conditions)" - ], - "outputs": [ - { - "output_type": "execute_result", - "execution_count": 6, - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
catalystt_restemperaturecatalyst_loadingtonyieldcomputation_texperiment_tstrategy
0P1-L1601001.023.36495433.130020.00.058378NaN
\n", - "
" - ], - "text/plain": [ - "NAME catalyst t_res temperature catalyst_loading ton yield \\\n", - "TYPE DATA DATA DATA DATA DATA DATA \n", - "0 P1-L1 60 100 1.0 23.364954 33.13002 \n", - "\n", - "NAME computation_t experiment_t strategy \n", - "TYPE METADATA METADATA METADATA \n", - "0 0.0 0.058378 NaN " - ] - }, - "metadata": {} - } - ], - "execution_count": 6, - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Now we have a benchmark that can accept conditions and predict the yield and TON!" - ], - "metadata": {} - } - ], - "metadata": { - "kernelspec": { - "name": "python37364bitsummittfmmv07ppy37venv6fc212842bc44e839a51e6623a646abd", - "language": "python", - "display_name": "Python 3.7.3 64-bit ('summit-TfmmV07p-py3.7': venv)" - }, - "language_info": { - "name": "python", - "version": "3.7.3", - "mimetype": "text/x-python", - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "pygments_lexer": "ipython3", - "nbconvert_exporter": "python", - "file_extension": ".py" - }, - "kernel_info": { - "name": "python37364bitsummittfmmv07ppy37venv6fc212842bc44e839a51e6623a646abd" - }, - "nteract": { - "version": "0.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/docs/source/experiments_benchmarks/new_benchmarks.rst b/docs/source/experiments_benchmarks/new_benchmarks.rst new file mode 100644 index 00000000..3b32335c --- /dev/null +++ b/docs/source/experiments_benchmarks/new_benchmarks.rst @@ -0,0 +1,2 @@ +.. + This a placeholder file to redirect stale links to the new tutorials directory. \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index c63b7721..a33b3518 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -17,11 +17,36 @@ Summit has two key features: * **Strategies**: Optimisation algorithms designed to find the best conditions with the least number of iterations. Summit has eight strategies implemented. * **Benchmarks**: Simulations of chemical reactions that can be used to test strategies. We have both mechanistic and data-driven benchmarks. -To get started, follow our tutorial_. You can find a more detailed treatment of Summit in our preprint_. +We suggest trying one of our tutorials_ or reading our publication_ (or preprint_). Also, give us a ⭐ on Github_! -Also, give us a ⭐ on Github_! +Below is a quick start that demonstrates the functionality of Summit: -.. _tutorial : tutorial.ipynb +.. code-block:: python + + # Import summit + from summit.benchmarks import SnarBenchmark + from summit.strategies import NelderMead, MultitoSingleObjective + from summit.run import Runner + + # Instantiate the benchmark + exp = SnarBenchmark() + + # Since the Snar benchmark has two objectives and Nelder-Mead is single objective, we need a multi-to-single objective transform + transform = MultitoSingleObjective( + exp.domain, expression="-sty/1e4+e_factor/100", maximize=False + ) + + # Set up the strategy, passing in the optimisation domain and transform + nm = NelderMead(exp.domain, transform=transform) + + # Use the runner to run closed loop experiments + r = Runner( + strategy=nm, experiment=exp,max_iterations=50 + ) + r.run() + +.. _tutorials : tutorials/index.rst +.. _publication : https://chemistry-europe.onlinelibrary.wiley.com/doi/full/10.1002/cmtd.202000051 .. _preprint : https://chemrxiv.org/articles/preprint/Summit_Benchmarking_Machine_Learning_Methods_for_Reaction_Optimisation/12939806 .. _Github : https://github.com/sustainable-processes/summit @@ -30,7 +55,7 @@ Also, give us a ⭐ on Github_! :caption: Contents: installation - tutorial + tutorials/index domains experiments_benchmarks/index strategies diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst new file mode 100644 index 00000000..3b32335c --- /dev/null +++ b/docs/source/tutorial.rst @@ -0,0 +1,2 @@ +.. + This a placeholder file to redirect stale links to the new tutorials directory. \ No newline at end of file diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst new file mode 100644 index 00000000..98591c25 --- /dev/null +++ b/docs/source/tutorials/index.rst @@ -0,0 +1,10 @@ +Tutorials +========= + +The tutorials should help you get familiarized with Summit. + +.. toctree:: + :maxdepth: 1 + + intro + new_benchmarks \ No newline at end of file diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorials/intro.ipynb similarity index 99% rename from docs/source/tutorial.ipynb rename to docs/source/tutorials/intro.ipynb index 5943d32a..389c71f8 100644 --- a/docs/source/tutorial.ipynb +++ b/docs/source/tutorials/intro.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Tutorial" + "# Intro to Summit" ] }, { @@ -60,7 +60,7 @@ "## SnAr Benchmark\n", "\n", "\n", - "![Image from Hone et al.](_static/hone_2016_snar_chemistry.png)\n", + "![Image from Hone et al.](../_static/hone_2016_snar_chemistry.png)\n", "\n", "Nucleophilic aromatic substitution reactions are commonly used in the fine chemicals industry. In this case, 2,4 dinitrofluorobenzene (**1**) undergoes nucleophilic attack by pyrrolidine (**2**) to form the desired product **3**. Two side products **4** and **5** can also be formed. Overall, we want to maximise the amount of product formed and minimise side product formation and waste.\n", "\n", @@ -534,7 +534,7 @@ "metadata": {}, "outputs": [], "source": [ - "FOLDER = pathlib.Path(\"_static/\") # When using this in the context of docs\n", + "FOLDER = pathlib.Path(\"../_static/\") # When using this in the context of docs\n", "# FOLDER = pathlib.Path(\".\")" ] }, @@ -863,7 +863,7 @@ ], "metadata": { "kernelspec": { - "display_name": "covid19", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/docs/source/tutorials/new_benchmarks.ipynb b/docs/source/tutorials/new_benchmarks.ipynb new file mode 100644 index 00000000..8816d0c8 --- /dev/null +++ b/docs/source/tutorials/new_benchmarks.ipynb @@ -0,0 +1,566 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating New Benchmarks\n", + "\n", + "Here we give a demonstration of how to create a new benchmark based on experimental data. We call these type of benchmarks emulators, and they use the class `ExperimentalEmulator`. \n", + "\n", + "Emulators contain machine learning models, which can learn patterns from experimental data. These models can then be used to predict the outcomes of reactions at conditions that have not been tested in the lab.\n", + "\n", + "Emulators are most applicable when a kinetic model is not available. This is common with reactions where catalysts, bases and acids are still being chosen.\n", + "\n", + "\n", + "As an example, we are going to create a benchmark for the Suzuki-Miyaura Cross-Coupling reaction in [Reizman et al. (2016)](https://doi.org/10.1039/C6RE00153J).\n", + "\n", + "\n", + "![Image from Hone et al.](../_static/suzuki_reizman.gif) \n", + "\n", + "Scheme reproduced from the paper." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Google Colab\n", + "\n", + "If you would like to follow along with this tutorial, you can open it in Google Colab using the button below." + ] + }, + { + "cell_type": "raw", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "|colab_badge|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You will need to run the following cell to make sure Summit and all its dependencies are installed. If prompted, restart the runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install summit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports \n", + "\n", + "Let's first import the needed parts of Summit." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from summit.benchmarks import ExperimentalEmulator\n", + "from summit.domain import *\n", + "from summit.utils.dataset import DataSet\n", + "import pkg_resources\n", + "import pathlib" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the domain\n", + "\n", + "We first need to create a :class:`~summit.domain.Domain`. A domain specifies the aspects of the reaction we will be optimizing. In optimization speak, these are the decision variables (those that are manipulated), constraints and objectives for a benchmark." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "domain = Domain()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Above, we instantiate a new domain without any variables. Here, we are going to manipulate the catalyst, catalyst loading, base stoichiometry and temperature. Our objectives are to maximise yield and minimise turn over number (TON). We can use the increment operator `+=` to add variables to the domain. There are no constraints." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Decision variables\n", + "des_1 = \"Catalyst type - different ligands\"\n", + "domain += CategoricalVariable(\n", + " name=\"catalyst\",\n", + " description=des_1,\n", + " levels=[\n", + " \"P1-L1\",\n", + " \"P2-L1\",\n", + " \"P1-L2\",\n", + " \"P1-L3\",\n", + " \"P1-L4\",\n", + " \"P1-L5\",\n", + " \"P1-L6\",\n", + " \"P1-L7\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We specify the catalyst as a `CategoricalVariable`, which can encapsulate discrete decisions such as choosing a catalyst, base, or acid from a list of potential options. We pass the list of potential options to the `levels` keyword argument. Our data should only include one of the catalysts in `levels`.\n", + "\n", + "Below, we use `ContinuousVariable` to specify the rest of the decision variables. Each has `bounds`, which represent the minimum and maximum values of each variable." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "des_2 = \"Residence time in seconds (s)\"\n", + "domain += ContinuousVariable(name=\"t_res\", description=des_2, bounds=[60, 600])\n", + "\n", + "des_3 = \"Reactor temperature in degrees Celsius (ºC)\"\n", + "domain += ContinuousVariable(\n", + " name=\"temperature\", description=des_3, bounds=[30, 110]\n", + ")\n", + "\n", + "des_4 = \"Catalyst loading in mol%\"\n", + "domain += ContinuousVariable(\n", + " name=\"catalyst_loading\", description=des_4, bounds=[0.5, 2.5]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we specify the objectives. We use `ContinuousVariable` again, but set `is_objective` to `True` and specify whether to maximize (or minimize) each objective." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Objectives\n", + "des_5 = \"Yield\"\n", + "domain += ContinuousVariable(\n", + " name=\"yield\",\n", + " description=des_5,\n", + " bounds=[0, 100],\n", + " is_objective=True,\n", + " maximize=True,\n", + ")\n", + "\n", + "\n", + "des_6 = (\n", + " \"Turnover number - moles product generated divided by moles catalyst used\"\n", + ")\n", + "domain += ContinuousVariable(\n", + " name=\"ton\",\n", + " description=des_6,\n", + " bounds=[0, 200],\n", + " is_objective=True,\n", + " maximize=True,\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When working inside a Jupyter Notebook, we can view the domain by putting it at the end of a cell and pressing enter." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
NameTypeDescriptionValues
catalystcategorical, inputCatalyst type - different ligands8 levels
t_rescontinuous, inputResidence time in seconds (s)[60,600]
temperaturecontinuous, inputReactor temperature in degrees Celsius (ºC)[30,110]
catalyst_loadingcontinuous, inputCatalyst loading in mol%[0.5,2.5]
yieldcontinuous, maximize objectiveYield[0,100]
toncontinuous, maximize objectiveTurnover number - moles product generated divided by moles catalyst used[0,200]
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "domain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data\n", + "\n", + "We now load in the data from past experiments, which we will use to train the emulator. Here, we import the data that we already have in the Summit package, but any data available in CSV format would work. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the data already available in Summit\n", + "DATA_PATH = pathlib.Path(pkg_resources.resource_filename(\"summit\", \"benchmarks/data\"))\n", + "\n", + "# Read in data into a DataSEt.\n", + "ds = DataSet.read_csv(DATA_PATH / \"reizman_suzuki_case_1.csv\",)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we are using a :class:`~summit.dataset.Dataset`. In the CSV, it is essential that the columns match the domain and an extra row is added below each column name with the word DATA (see [here](https://github.com/sustainable-processes/summit/blob/master/summit/benchmarks/data/reizman_suzuki_case_1.csv) for an example)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the Emulator\n", + "\n", + "Now we only need two lines train the experimental emulator! We first instantiate `ExperimentalEmulator` passing in the dataset, domain and a name for the model. Next we train it with two-fold [cross-validation](https://machinelearningmastery.com/k-fold-cross-validation/) and a test set size of 25%.\n", + "\n", + "This step will take some time. Change verbose to 1 if you want streaming updates of the training." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'fit_time': array([9.62368822, 9.36741114, 8.81644607, 8.90442324, 8.75493693]),\n", + " 'score_time': array([0.00828385, 0.00569177, 0.00601172, 0.00571394, 0.00531006]),\n", + " 'val_r2': array([0.73406495, 0.79712705, 0.87009207, 0.8853467 , 0.71850636]),\n", + " 'val_neg_root_mean_squared_error': array([-15.48383141, -12.27886391, -10.41021824, -8.35124302,\n", + " -12.52505112])}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "emul = ExperimentalEmulator(model_name='my_reizman', domain=domain, dataset=ds)\n", + "emul.train(max_epochs=1000, cv_fold=2, test_size=0.1, verbose=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The training returns a `scores` dictionary from [scikit-learn](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate), which contains the results from each cross-validation fold. It might be difficult to understand these scores, so we show some more intuitive evaluation methods next. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate Emulator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A [parity plot](https://en.wikipedia.org/wiki/Parity_plot) shows experimental data against model predictions. We can do this for both the train and test sets. The $r^2$ score is shown, which varies between 0 and 1 with 1 being perfect fit." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = emul.parity_plot(include_test=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also try running virtual experiments using the benchmark to see if they match our expectations." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
catalystt_restemperaturecatalyst_loadingyieldtoncomputation_texperiment_tstrategy
0P1-L1601001.030.52187229.7584840.00.034721NaN
\n", + "
" + ], + "text/plain": [ + "NAME catalyst t_res temperature catalyst_loading yield ton \\\n", + "TYPE DATA DATA DATA DATA DATA DATA \n", + "0 P1-L1 60 100 1.0 30.521872 29.758484 \n", + "\n", + "NAME computation_t experiment_t strategy \n", + "TYPE METADATA METADATA METADATA \n", + "0 0.0 0.034721 NaN " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conditions = [[\"P1-L1\", 60, 100, 1.0]]\n", + "conditions = DataSet(conditions, columns=[v.name for v in domain.input_variables])\n", + "emul.run_experiments(conditions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving and Loading Emulators\n", + "\n", + "We can save the trained emulator to disk, so we can reuse it later. The `save` method will do two things: \n", + "\n", + "- Create a JSON file `model_name.json` (where model_name is the name used when ExperimentalEmulator is called). The JSON file contains the domain, any experiments run using the emulator, and some important hyperparameters (e.g., the mean and standard deviation used for normalization).\n", + "- Create several `.pt` files with the weights of the models trained using cross validation.\n", + "\n", + "We simply specify a directory where the files should be saved." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "emul.save(save_dir=\"emulators_tutorial\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can load the saved emulator and use it as expected." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
catalystt_restemperaturecatalyst_loadingyieldtoncomputation_texperiment_tstrategy
0P1-L1601001.030.52187229.7584840.00.027989NaN
\n", + "
" + ], + "text/plain": [ + "NAME catalyst t_res temperature catalyst_loading yield ton \\\n", + "TYPE DATA DATA DATA DATA DATA DATA \n", + "0 P1-L1 60 100 1.0 30.521872 29.758484 \n", + "\n", + "NAME computation_t experiment_t strategy \n", + "TYPE METADATA METADATA METADATA \n", + "0 0.0 0.027989 NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "emul_new = ExperimentalEmulator.load(model_name=\"my_reizman\", save_dir=\"emulators_tutorial\")\n", + "emul_new.run_experiments(conditions)" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python37364bitsummittfmmv07ppy37venv6fc212842bc44e839a51e6623a646abd" + }, + "kernelspec": { + "display_name": "Python 3.7.3 64-bit ('summit-TfmmV07p-py3.7': venv)", + "language": "python", + "name": "python37364bitsummittfmmv07ppy37venv6fc212842bc44e839a51e6623a646abd" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "nteract": { + "version": "0.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/poetry.lock b/poetry.lock index 004bdd4f..fe7efa96 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,7 +2,7 @@ category = "main" description = "A configurable sidebar-enabled Sphinx theme" name = "alabaster" -optional = true +optional = false python-versions = "*" version = "0.7.12" @@ -36,7 +36,7 @@ version = "1.4.4" [[package]] category = "main" description = "Disable App Nap on macOS >= 10.9" -marker = "python_version >= \"3.4\" and sys_platform == \"darwin\" or platform_system == \"Darwin\" or python_version >= \"3.4\" and platform_system == \"Darwin\"" +marker = "sys_platform == \"darwin\" or platform_system == \"Darwin\" or python_version >= \"3.3\" and sys_platform == \"darwin\" or python_version >= \"3.4\" and sys_platform == \"darwin\" or python_version >= \"3.4\" and platform_system == \"Darwin\"" name = "appnope" optional = false python-versions = "*" @@ -113,7 +113,7 @@ numpy = ">=1.12" category = "main" description = "Internationalization utilities" name = "babel" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "2.9.0" @@ -123,7 +123,6 @@ pytz = ">=2015.7" [[package]] category = "main" description = "Specifications for callback functions passed in to an API" -marker = "python_version >= \"3.4\"" name = "backcall" optional = false python-versions = "*" @@ -216,25 +215,6 @@ optional = true python-versions = "*" version = "1.4" -[[package]] -category = "main" -description = "A simple and extensible library to create Bayesian Neural Network Layers on PyTorch without trouble and with full integration with nn.Module and nn.Sequential." -name = "blitz-bayesian-pytorch" -optional = true -python-versions = "*" -version = "0.2.5" - -[package.dependencies] -numpy = "*" -pillow = ">=7.1" -scikit-learn = ">=0.22.2" -torch = ">=1.4.0" -torchvision = ">=0.5.0" - -[package.source] -reference = "1684957d06b9af8aec5e71b37947c54ff7950a22" -type = "git" -url = "https://github.com/sustainable-processes/blitz-bayesian-deep-learning.git" [[package]] category = "main" description = "The AWS SDK for Python" @@ -353,7 +333,7 @@ version = "4.2.1" category = "main" description = "Python package for providing Mozilla's CA Bundle." name = "certifi" -optional = true +optional = false python-versions = "*" version = "2020.12.5" @@ -372,7 +352,7 @@ pycparser = "*" category = "main" description = "Universal encoding detector for Python 2 and 3" name = "chardet" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" version = "4.0.0" @@ -395,7 +375,7 @@ version = "2.7.0" [[package]] category = "main" description = "Cross-platform colored terminal text." -marker = "python_version >= \"3.4\" and sys_platform == \"win32\" or sys_platform == \"win32\"" +marker = "python_version >= \"3.3\" and sys_platform == \"win32\" or sys_platform == \"win32\" or python_version >= \"3.4\" and sys_platform == \"win32\"" name = "colorama" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" @@ -459,7 +439,7 @@ version = "0.6.0" category = "main" description = "Docutils -- Python Documentation Utilities" name = "docutils" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" version = "0.16" @@ -661,7 +641,7 @@ dev = ["pytest", "mypy", "ipykernel", "wheel", "selenium", "sphinx", "twine", "g category = "main" description = "Internationalized Domain Names in Applications (IDNA)" name = "idna" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "2.10" @@ -669,7 +649,7 @@ version = "2.10" category = "main" description = "Getting image size from png/jpeg/jpeg2000/gif file" name = "imagesize" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.2.0" @@ -802,7 +782,6 @@ version = "1.1.0" [[package]] category = "main" description = "An autocompletion tool for Python that can be used for text editors." -marker = "python_version >= \"3.4\"" name = "jedi" optional = false python-versions = ">=3.6" @@ -1340,7 +1319,7 @@ version = "0.8.1" [[package]] category = "main" description = "Pexpect allows easy control of interactive console applications." -marker = "python_version >= \"3.4\" and sys_platform != \"win32\"" +marker = "python_version >= \"3.3\" and sys_platform != \"win32\" or sys_platform != \"win32\" or python_version >= \"3.4\" and sys_platform != \"win32\"" name = "pexpect" optional = false python-versions = "*" @@ -1352,7 +1331,6 @@ ptyprocess = ">=0.5" [[package]] category = "main" description = "Tiny 'shelve'-like database with concurrency support" -marker = "python_version >= \"3.4\"" name = "pickleshare" optional = false python-versions = "*" @@ -1396,7 +1374,6 @@ twisted = ["twisted"] [[package]] category = "main" description = "Library for building powerful interactive command lines in Python" -marker = "python_version >= \"3.4\"" name = "prompt-toolkit" optional = false python-versions = ">=3.6.1" @@ -1659,7 +1636,7 @@ version = "2020.11.13" category = "main" description = "Python HTTP for Humans." name = "requests" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" version = "2.25.1" @@ -1796,7 +1773,7 @@ version = "3.0.5" category = "main" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." name = "snowballstemmer" -optional = true +optional = false python-versions = "*" version = "2.1.0" @@ -1813,7 +1790,7 @@ version = "2.2" category = "main" description = "Python documentation generator" name = "sphinx" -optional = true +optional = false python-versions = ">=3.5" version = "3.5.1" @@ -1841,6 +1818,17 @@ docs = ["sphinxcontrib-websupport"] lint = ["flake8 (>=3.5.0)", "isort", "mypy (>=0.800)", "docutils-stubs"] test = ["pytest", "pytest-cov", "html5lib", "cython", "typed-ast"] +[[package]] +category = "main" +description = "Handles redirects for moved pages in Sphinx documentation projects" +name = "sphinx-reredirects" +optional = false +python-versions = "*" +version = "0.0.0" + +[package.dependencies] +sphinx = "*" + [[package]] category = "main" description = "Read the Docs theme for Sphinx" @@ -1859,7 +1847,7 @@ dev = ["transifex-client", "sphinxcontrib-httpdomain", "bump2version"] category = "main" description = "sphinxcontrib-applehelp is a sphinx extension which outputs Apple help books" name = "sphinxcontrib-applehelp" -optional = true +optional = false python-versions = ">=3.5" version = "1.0.2" @@ -1871,7 +1859,7 @@ test = ["pytest"] category = "main" description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." name = "sphinxcontrib-devhelp" -optional = true +optional = false python-versions = ">=3.5" version = "1.0.2" @@ -1883,7 +1871,7 @@ test = ["pytest"] category = "main" description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" name = "sphinxcontrib-htmlhelp" -optional = true +optional = false python-versions = ">=3.5" version = "1.0.3" @@ -1895,7 +1883,7 @@ test = ["pytest", "html5lib"] category = "main" description = "A sphinx extension which renders display math in HTML via JavaScript" name = "sphinxcontrib-jsmath" -optional = true +optional = false python-versions = ">=3.5" version = "1.0.1" @@ -1906,7 +1894,7 @@ test = ["pytest", "flake8", "mypy"] category = "main" description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." name = "sphinxcontrib-qthelp" -optional = true +optional = false python-versions = ">=3.5" version = "1.0.3" @@ -1918,7 +1906,7 @@ test = ["pytest"] category = "main" description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." name = "sphinxcontrib-serializinghtml" -optional = true +optional = false python-versions = ">=3.5" version = "1.1.4" @@ -2066,22 +2054,6 @@ version = "1.7.1" numpy = "*" typing-extensions = "*" -[[package]] -category = "main" -description = "image and video datasets and models for torch deep learning" -name = "torchvision" -optional = true -python-versions = "*" -version = "0.8.2" - -[package.dependencies] -numpy = "*" -pillow = ">=4.1.1" -torch = "1.7.1" - -[package.extras] -scipy = ["scipy"] - [[package]] category = "main" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." @@ -2147,7 +2119,7 @@ pytz = "*" category = "main" description = "HTTP library with thread-safe connection pooling, file post, and more." name = "urllib3" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" version = "1.26.3" @@ -2266,13 +2238,13 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] testing = ["pytest (>=3.5,<3.7.3 || >3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "jaraco.test (>=3.2.0)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] [extras] -bnn = ["blitz-bayesian-pytorch"] +bnn = [] docs = ["sphinx", "nbsphinx", "sphinx-rtd-theme"] entmoot = ["entmoot"] experiments = ["neptune-client", "hiplot", "paramiko", "pyrecorder", "xlrd", "streamlit"] [metadata] -content-hash = "db435ec76eb3ce23c2daf294c3f2b7eb29e01dfb6d41d3a566a46561fd1a4705" +content-hash = "1ef88dcfa2d8bf97004ae18cd4e71ee3a33a57f523a9e66cfd632a5ec0c92a33" python-versions = "^3.7" [metadata.files] @@ -2366,7 +2338,6 @@ bleach = [ blinker = [ {file = "blinker-1.4.tar.gz", hash = "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6"}, ] -blitz-bayesian-pytorch = [] boto3 = [ {file = "boto3-1.17.11-py2.py3-none-any.whl", hash = "sha256:b6131751e3cf2f8d4c027518373b6b82264c3897de65d3519e2d782927e8bf1e"}, {file = "boto3-1.17.11.tar.gz", hash = "sha256:7d44cbd931c653cc68e8ccbf39f3ad8b304cb50d4e964d8c8d0936de33ff8c8b"}, @@ -3499,6 +3470,10 @@ sphinx = [ {file = "Sphinx-3.5.1-py3-none-any.whl", hash = "sha256:e90161222e4d80ce5fc811ace7c6787a226b4f5951545f7f42acf97277bfc35c"}, {file = "Sphinx-3.5.1.tar.gz", hash = "sha256:11d521e787d9372c289472513d807277caafb1684b33eb4f08f7574c405893a9"}, ] +sphinx-reredirects = [ + {file = "sphinx_reredirects-0.0.0-py3-none-any.whl", hash = "sha256:681149d869f782779662dd646b23ca13d3dfe3e98548ad516273052ab2598b7f"}, + {file = "sphinx_reredirects-0.0.0.tar.gz", hash = "sha256:f9db1fc77ff78d4a8a011e4baf94285bb1e31e10447f5bff799e119bbc6ea726"}, +] sphinx-rtd-theme = [ {file = "sphinx_rtd_theme-0.5.1-py2.py3-none-any.whl", hash = "sha256:fa6bebd5ab9a73da8e102509a86f3fcc36dec04a0b52ea80e5a033b2aba00113"}, {file = "sphinx_rtd_theme-0.5.1.tar.gz", hash = "sha256:eda689eda0c7301a80cf122dad28b1861e5605cbf455558f3775e1e8200e83a5"}, @@ -3578,16 +3553,6 @@ torch = [ {file = "torch-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:6652a767a0572ae0feb74ad128758e507afd3b8396b6e7f147e438ba8d4c6f63"}, {file = "torch-1.7.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:38d67f4fb189a92a977b2c0a38e4f6dd413e0bf55aa6d40004696df7e40a71ff"}, ] -torchvision = [ - {file = "torchvision-0.8.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:86fae370d222f76ad57c57c3bee03f78b8db727743bfb4c1559a3d395159cea8"}, - {file = "torchvision-0.8.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:951239b5fcb911dbf78c1385d677f5f48c7a1b12859e3d3ec287562821b17cf2"}, - {file = "torchvision-0.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:24db8f4c3d812a032273f68563ad5dbd724f5bfbed523d0c6dce8cede26bb153"}, - {file = "torchvision-0.8.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b068f6bcbe91bdd34dda0a39e8a26392add45a3be82543f6dd523b76484fb56f"}, - {file = "torchvision-0.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:afb76a66b9b0693f758a881a2bf333ed97e3c0c3f15a413c4f49d8dd8bd21307"}, - {file = "torchvision-0.8.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cd8817e9197fc60ebae37162a445db90bbf35591314a5767ad3d1490b5d65b0f"}, - {file = "torchvision-0.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1bd58acc3366ec02266aae56a7a752d43ef07de4a6ba420c4f907d0c9168bb8c"}, - {file = "torchvision-0.8.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:976750a49db2e23dc5a1ed0b5c31f7af51ed2702eee410ee09ef985c3a3e48cf"}, -] tornado = [ {file = "tornado-6.1-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32"}, {file = "tornado-6.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c"}, diff --git a/pyproject.toml b/pyproject.toml index ef12df11..ef0ddb16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,12 +51,13 @@ nbsphinx = {version="^0.7.1", optional=true} sphinx-rtd-theme = {version="^0.5.0", optional=true} pyrecorder = {version="^0.1.8", optional=true} entmoot = {version="^0.1.4", optional=true} +sphinx-reredirects = {version="^0.0.0", optional=true} [tool.poetry.extras] bnn = ["blitz-bayesian-pytorch"] entmoot = ["entmoot"] experiments = ["neptune-client", "hiplot", "paramiko", "pyrecorder", "xlrd", "streamlit"] -docs = ["sphinx", "nbsphinx", "sphinx-rtd-theme"] +docs = ["sphinx", "nbsphinx", "sphinx-rtd-theme", "sphinx-reredirects"] [tool.poetry.dev-dependencies] diff --git a/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_descriptors.png b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_descriptors.png new file mode 100644 index 00000000..f9d5a1a8 Binary files /dev/null and b/scripts/train_emulators/results/baumgartner_aniline_cn_crosscoupling_descriptors.png differ diff --git a/summit/benchmarks/experimental_emulator.py b/summit/benchmarks/experimental_emulator.py index 781d5555..8b60e4b2 100644 --- a/summit/benchmarks/experimental_emulator.py +++ b/summit/benchmarks/experimental_emulator.py @@ -117,14 +117,15 @@ class ExperimentalEmulator(Experiment): >>> import matplotlib.pyplot as plt >>> import pathlib >>> import pkg_resources - >>> # Steal domain and ata from Reizman example + >>> # Steal domain and data from Reizman example >>> DATA_PATH = pathlib.Path(pkg_resources.resource_filename("summit", "benchmarks/data")) >>> model_name = f"reizman_suzuki_case_1" >>> domain = ReizmanSuzukiEmulator.setup_domain() >>> ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv") - >>> # Create emulator and train + >>> # Create emulator and train (bump max_epochs to 1000 to get better training) >>> exp = ExperimentalEmulator(model_name,domain,dataset=ds) - >>> res = exp.train(max_epochs=1000, cv_folds=2, random_state=100, test_size=0.2) + >>> res = exp.train(max_epochs=10, cv_folds=2, random_state=100, test_size=0.2) + >>> # Plot to show the quality of the fit >>> fig, ax = exp.parity_plot(include_test=True) >>> plt.show() @@ -202,6 +203,9 @@ def _predict(self, X, **kwargs): def train(self, **kwargs): """Train the model on the dataset + This will automatically do a train-test split and then train via + cross-validation on the train set. + Parameters --------- test_size : float, optional @@ -299,6 +303,18 @@ def train(self, **kwargs): return res def test(self, **kwargs): + """Get test results + + This requires that train has already been called or + the ExperimentalEmulator was initialized from a pretrained model. + + Parameters + ---------- + scoring : str or list, optional + A list of scoring functions or names of them. Defaults to R2 and MSE. + See here for more https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter + + """ scoring = kwargs.get("scoring", ["r2", "neg_root_mean_squared_error"]) scores_list = [] for predictor in self.predictors: @@ -639,7 +655,7 @@ def save(self, save_dir): @classmethod def load(cls, model_name, save_dir, **kwargs): - """Load all the essential parameters of the ExperimentalEmulator to disk + """Load all the essential parameters of the ExperimentalEmulator from disk Parameters ---------- @@ -680,7 +696,8 @@ def parity_plot(self, **kwargs): if type(vars) == str: vars = [vars] - fig, axes = plt.subplots(1, len(vars)) + fig, axes = plt.subplots(1, len(vars), figsize=(10, 5)) + fig.subplots_adjust(wspace=0.5) if len(vars) > 1: fig.subplots_adjust(wspace=0.2) if type(axes) != np.ndarray: @@ -1323,8 +1340,13 @@ def forward(self, x, **kwargs): class RegressorRegistry: """Registry for Regressors - Models registered using the register method - are saved as the class name. + The registry stores regressors that can be used with the + :class:~`summit.benchmarks.ExperimentalEmulator`. A regressor can be + any `torch.nn.Module` that takes the parameeters `input_dim` and `output_dim` for + the input and output dimensions respectively. + + Registering a regressor means that it can be serialized and deserialized + using the save/load functionality of the emulator. """ @@ -1345,6 +1367,14 @@ def __setitem__(self, key, value): self.regressors[key] = value def register(self, regressor): + """Register a new regresssor + + Parameters + --------- + regressor: torch.nn.Module + A torch neural network module + + """ key = regressor.__name__ self.regressors[key] = regressor @@ -1376,8 +1406,18 @@ def get_pretrained_reizman_suzuki_emulator(case=1): Examples --------- - >>> exp = get_pretrained_reizman_suzuki_emulator(case=1) - + >>> import matplotlib.pyplot as plt + >>> from summit.benchmarks import get_pretrained_reizman_suzuki_emulator + >>> from summit.utils.dataset import DataSet + >>> import pandas as pd + >>> b = get_pretrained_reizman_suzuki_emulator(case=1) + >>> fig, ax = b.parity_plot(include_test=True) + >>> plt.show() + >>> columns = [v.name for v in b.domain.variables] + >>> values = { "catalyst": ["P1-L3"], "t_res": [600], "temperature": [30],"catalyst_loading": [0.498],} + >>> conditions = pd.DataFrame(values) + >>> conditions = DataSet.from_df(conditions) + >>> results = b.run_experiments(conditions, return_std=True) """ model_name = f"reizman_suzuki_case_{case}" model_path = get_model_path() / model_name @@ -1515,13 +1555,31 @@ def get_pretrained_baumgartner_cc_emulator(include_cost=False, use_descriptors=F a single feature, pass descriptors_features a list where the only item is the name of the desired categorical variable. + + Examples + -------- + + >>> import matplotlib.pyplot as plt + >>> from summit.benchmarks import get_pretrained_baumgartner_cc_emulator + >>> from summit.utils.dataset import DataSet + >>> import pandas as pd + >>> b = get_pretrained_baumgartner_cc_emulator(include_cost=True, use_descriptors=False) + >>> fig, ax = b.parity_plot(include_test=True) + >>> plt.show() + >>> columns = [v.name for v in b.domain.variables] + >>> values = { "catalyst": ["tBuXPhos"], "base": ["DBU"], "t_res": [328.717801570892],"temperature": [30],"base_equivalents": [2.18301549894049]} + >>> conditions = pd.DataFrame(values) + >>> conditions = DataSet.from_df(conditions) + >>> results = b.run_experiments(conditions, return_std=True) + """ model_name = "baumgartner_aniline_cn_crosscoupling" + data_path = get_data_path() + ds = DataSet.read_csv(data_path / f"{model_name}.csv") + model_name += "_descriptors" if use_descriptors else "" model_path = get_model_path() / model_name if not model_path.exists(): raise NotADirectoryError("Could not initialize from expected path.") - data_path = get_data_path() - ds = DataSet.read_csv(data_path / f"{model_name}.csv") exp = BaumgartnerCrossCouplingEmulator.load( model_path, dataset=ds, @@ -1672,9 +1730,21 @@ def load(cls, save_dir, include_cost=False, use_descriptors=False, **kwargs): ---------- save_dir : str or pathlib.Path The directory from which to load emulator files. + include_cost : bool, optional + Include minimization of cost as an extra objective. Cost is calculated + as a deterministic function of the inputs (i.e., no model is trained). + Defaults to False. + use_descriptors : bool, optional + Use descriptors for the catalyst and base instead of one-hot encoding (defaults to False). T + The descriptors been pre-calculated using COSMO-RS. To only use descriptors with + a single feature, pass descriptors_features a list where + the only item is the name of the desired categorical variable. """ - model_name = "baumgartner_aniline_cn_crosscoupling" + if use_descriptors: + model_name = "baumgartner_aniline_cn_crosscoupling_descriptors" + else: + model_name = "baumgartner_aniline_cn_crosscoupling" save_dir = pathlib.Path(save_dir) with open(save_dir / f"{model_name}.json", "r") as f: d = json.load(f) diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors.json b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors.json new file mode 100644 index 00000000..a9e050d3 --- /dev/null +++ b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors.json @@ -0,0 +1 @@ +{"domain": [{"type": "CategoricalVariable", "is_objective": false, "name": "catalyst", "description": "Catalyst type", "units": null, "levels": ["tBuXPhos", "tBuBrettPhos", "AlPhos"], "ds": {"index": ["tBuXPhos", "tBuBrettPhos", "AlPhos"], "columns": [["area_cat", "DATA"], ["M2_cat", "DATA"]], "data": [[460.7543, 67.2057], [518.8408, 89.8738], [819.933, 129.0808]]}}, {"type": "CategoricalVariable", "is_objective": false, "name": "base", "description": "Base", "units": null, "levels": ["DBU", "BTMG", "TMG", "TEA"], "ds": {"index": ["TEA", "TMG", "BTMG", "DBU"], "columns": [["area", "DATA"], ["M2", "DATA"]], "data": [[162.2992, 25.8165], [165.5447, 81.4847], [227.3523, 30.554], [192.4693, 59.8367]]}}, {"type": "ContinuousVariable", "is_objective": false, "name": "base_equivalents", "description": "Base equivalents", "units": null, "bounds": [1.0, 2.5]}, {"type": "ContinuousVariable", "is_objective": false, "name": "temperature", "description": "Temperature in degrees Celsius (\u00baC)", "units": null, "bounds": [30.0, 100.0]}, {"type": "ContinuousVariable", "is_objective": false, "name": "t_res", "description": "residence time in seconds (s)", "units": null, "bounds": [60.0, 1800.0]}, {"type": "ContinuousVariable", "is_objective": true, "name": "yield", "description": "Yield", "units": null, "bounds": [0.0, 1.0]}], "name": "ExperimentalEmulator", "data": {"index": [], "columns": [["catalyst", "DATA"], ["base", "DATA"], ["base_equivalents", "DATA"], ["temperature", "DATA"], ["t_res", "DATA"], ["yield", "DATA"], ["computation_t", "METADATA"], ["experiment_t", "METADATA"], ["strategy", "METADATA"]], "data": []}, "experiment_params": {"model_name": "baumgartner_aniline_cn_crosscoupling_descriptors", "regressor_name": "ANNRegressor", "n_features": 7, "n_examples": 96, "descriptors_features": ["catalyst", "base"], "output_variable_names": ["yield"], "predictors": [{"input_preprocessor": {"num": {"mean_": [1.6559957171333333, 69.63333333333334, 675.2387380961666], "var_": [0.24093575016750415, 906.7308888888889, 253625.04696145264], "scale_": [0.4908520654611776, 30.11197251740392, 503.6120004144586], "n_samples_seen_": 60}}, "output_preprocessor": {"mean_": [0.5805532822851092], "var_": [0.1785550681951766], "scale_": [0.42255776906261777], "n_samples_seen_": 60}}, {"input_preprocessor": {"num": {"mean_": [1.683371605967213, 74.62295081967213, 715.8182047760656], "var_": [0.24120375329778332, 866.5532437516797, 223573.2988868711], "scale_": [0.49112498745002103, 29.43727643230059, 472.83538243967223], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.6254036210660563], "var_": [0.16832977557768586], "scale_": [0.4102801184284779], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.6615540930327868, 69.30163934426228, 681.7872910763934], "var_": [0.24927125858022928, 924.5657350174685, 269346.5438305566], "scale_": [0.49927072674074263, 30.40667254103067, 518.9860728676219], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.5703947171110844], "var_": [0.18538124205880477], "scale_": [0.43055922015305254], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.628605248, 72.78196721311477, 735.1206038822951], "var_": [0.24907596673949953, 907.5027895726955, 244282.01121683227], "scale_": [0.4990751113204299, 30.124786963108892, 494.24893648528194], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.6131021287750269], "var_": [0.17856050759065348], "scale_": [0.4225642052879698], "n_samples_seen_": 61}}, {"input_preprocessor": {"num": {"mean_": [1.6073154240819671, 73.0311475409836, 743.0736161352461], "var_": [0.2525966705755136, 848.4237839290512, 251362.03663802444], "scale_": [0.5025899626688873, 29.12771504819853, 501.3601865306263], "n_samples_seen_": 61}}, "output_preprocessor": {"mean_": [0.564398883872467], "var_": [0.18049929808199283], "scale_": [0.4248520896523787], "n_samples_seen_": 61}}]}, "extras": []} \ No newline at end of file diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_0.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_0.pt new file mode 100644 index 00000000..1950553f Binary files /dev/null and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_0.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_1.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_1.pt new file mode 100644 index 00000000..c6f7db4c Binary files /dev/null and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_1.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_2.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_2.pt new file mode 100644 index 00000000..7483179f Binary files /dev/null and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_2.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_3.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_3.pt new file mode 100644 index 00000000..3fb94fdb Binary files /dev/null and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_3.pt differ diff --git a/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_4.pt b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_4.pt new file mode 100644 index 00000000..1fab2856 Binary files /dev/null and b/summit/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/baumgartner_aniline_cn_crosscoupling_descriptors_predictor_4.pt differ diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index 3a3d7262..9c13466e 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -103,7 +103,9 @@ def test_reizman_emulator(show_plots=False): @pytest.mark.parametrize("include_cost", [True, False]) def test_baumgartner_CC_emulator(use_descriptors, include_cost, show_plots=False): """ Test the Baumgartner Cross Coupling emulator""" - b = get_pretrained_baumgartner_cc_emulator(use_descriptors) + b = get_pretrained_baumgartner_cc_emulator( + use_descriptors=use_descriptors, include_cost=include_cost + ) b.parity_plot(include_test=True) if show_plots: plt.show()