From 27f90beb57de62333989a5a64f47d1759b33804e Mon Sep 17 00:00:00 2001 From: James Spencer Date: Mon, 5 Oct 2020 12:24:51 +0100 Subject: [PATCH] Initial commit. PiperOrigin-RevId: 335389185 --- CONTRIBUTING.md | 29 + LICENSE | 203 +++++ README.md | 117 +++ bin/ferminet | 384 ++++++++ ferminet/hamiltonian.py | 211 +++++ ferminet/mcmc.py | 138 +++ ferminet/mean_corrected_kfac_opt.py | 52 ++ ferminet/networks.py | 1300 +++++++++++++++++++++++++++ ferminet/qmc.py | 493 ++++++++++ ferminet/tests/elements_test.py | 121 +++ ferminet/tests/hamiltonian_test.py | 285 ++++++ ferminet/tests/mcmc_test.py | 119 +++ ferminet/tests/networks_test.py | 394 ++++++++ ferminet/tests/scf_test.py | 238 +++++ ferminet/tests/system_test.py | 100 +++ ferminet/tests/train_test.py | 268 ++++++ ferminet/tests/units_test.py | 83 ++ ferminet/train.py | 542 +++++++++++ ferminet/utils/analysis_tools.py | 114 +++ ferminet/utils/elements.py | 221 +++++ ferminet/utils/scf.py | 348 +++++++ ferminet/utils/system.py | 265 ++++++ ferminet/utils/units.py | 46 + setup.py | 52 ++ 24 files changed, 6123 insertions(+) create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 bin/ferminet create mode 100644 ferminet/hamiltonian.py create mode 100644 ferminet/mcmc.py create mode 100644 ferminet/mean_corrected_kfac_opt.py create mode 100644 ferminet/networks.py create mode 100644 ferminet/qmc.py create mode 100644 ferminet/tests/elements_test.py create mode 100644 ferminet/tests/hamiltonian_test.py create mode 100644 ferminet/tests/mcmc_test.py create mode 100644 ferminet/tests/networks_test.py create mode 100644 ferminet/tests/scf_test.py create mode 100644 ferminet/tests/system_test.py create mode 100644 ferminet/tests/train_test.py create mode 100644 ferminet/tests/units_test.py create mode 100644 ferminet/train.py create mode 100644 ferminet/utils/analysis_tools.py create mode 100644 ferminet/utils/elements.py create mode 100644 ferminet/utils/scf.py create mode 100644 ferminet/utils/system.py create mode 100644 ferminet/utils/units.py create mode 100644 setup.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..22b241c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,29 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement (CLA). You (or your employer) retain the copyright to your +contribution; this simply gives us permission to use and redistribute your +contributions as part of the project. Head over to + to see your current agreements on file or +to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6b0b127 --- /dev/null +++ b/LICENSE @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..21cb2f0 --- /dev/null +++ b/README.md @@ -0,0 +1,117 @@ +# FermiNet: Fermionic Neural Networks + +An implementation of the algorithm and experiments defined in "Ab-Initio +Solution of the Many-Electron Schroedinger Equation with Deep Neural Networks", +David Pfau, James S. Spencer, Alex G de G Matthews and W.M.C. Foulkes, Phys. +Rev. Research 2, 033429 (2020). FermiNet is a neural network for learning the +ground state wavefunctions of atoms and molecules using a variational Monte +Carlo approach. + +## Installation + +`pip install -e .` will install all required dependencies. This is best done +inside a [virtual environment](https://docs.python-guide.org/dev/virtualenvs/). + +``` +virtualenv -m python3.7 ~/venv/ferminet +source ~/venv/ferminet/bin/activate +pip install -e . +``` + +If you have a GPU available (highly recommended for fast training), then use +`pip install -e '.[tensorflow-gpu]'` to install TensorFlow with GPU support. + +We use python 3.7 (or earlier) because there is TensorFlow 1.15 wheel available +for it. TensorFlow 2 is not currently supported. + +The tests are easiest run using pytest: + +``` +pip install pytest +python -m pytest +``` + +## Usage + +``` +ferminet --batch_size 1024 --pretrain_iterations 100 +``` + +will train FermiNet to find the ground-state wavefunction of the LiH molecule +with a bond-length of 1.63999 angstroms using a batch size of 1024 MCMC +configurations ("walkers" in variational Monte Carlo language), and 100 +iterations of pretraining (the default of 1000 is overkill for such a small +system). The system and hyperparameters can be controlled by flags. Run + +``` +ferminet --help +``` + +to see the available options. Several systems used in the FermiNet paper are +included by default. Other systems can easily be set up, by setting the +appropriate system flags to `ferminet`, modifying `ferminet.utils.system` or +writing a custom training script. For example, to run on the H2 molecule: + +``` +import sys + +from absl import logging +from ferminet.utils import system +from ferminet import train + +# Optional, for also printing training progress to STDOUT +logging.get_absl_handler().python_handler.stream = sys.stdout +logging.set_verbosity(logging.INFO) + +# Define H2 molecule +molecule = [system.Atom('H', (0, 0, -1)), system.Atom('H', (0, 0, 1))] + +train.train( + molecule=molecule, + spins=(1, 1), + batch_size=256, + pretrain_config=train.PretrainConfig(iterations=100), + logging_config=train.LoggingConfig(result_path='H2'), +) +``` + +`train.train` is controlled by a several lightweight config objects. Only +non-default settings need to be explicitly supplied. Please see the docstrings +for `train.train` and associated `*Config` classes for details. + +Note: to train on larger atoms and molecules with large batch sizes, multi-GPU +parallelisation is essential. This is supported via TensorFlow's +[MirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy) +and the `--multi_gpu` flag. + +## Output + +The results directory contains `pretrain_stats.csv`, which contains the +pretraining loss for each iteration, `train_stats.csv` which contains the local +energy and MCMC acceptance probability for each iteration, and the `checkpoints` +directory, which contains the checkpoints generated during training. If +requested, there is also an HDF5 file, `data.h5`, which contains the walker +configuration, per-walker local energies and per-walker wavefunction values for +each iteration. Warning: this quickly becomes very large! + +## Giving Credit + +If you use this code in your work, please cite the associated paper: + +``` +@article{ferminet, + title={Ab-Initio Solution of the Many-Electron Schr{\"o}dinger Equation with Deep Neural Networks}, + author={D. Pfau and J.S. Spencer and A.G. de G. Matthews and W.M.C. Foulkes}, + journal={Phys. Rev. Research}, + year={2020}, + volume={2}, + issue = {3}, + pages={033429}, + doi = {10.1103/PhysRevResearch.2.033429}, + url = {https://link.aps.org/doi/10.1103/PhysRevResearch.2.033429} +} +``` + +## Disclaimer + +This is not an official Google product. diff --git a/bin/ferminet b/bin/ferminet new file mode 100644 index 0000000..b8efc7b --- /dev/null +++ b/bin/ferminet @@ -0,0 +1,384 @@ +# Lint as: python3 +# Copyright 2020 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Learn ground state wavefunctions for molecular systems using VMC.""" + +import ast +import datetime +import os +import sys + +from absl import app +from absl import flags +from absl import logging +from ferminet import train +from ferminet.utils import system +import numpy as np +import tensorflow.compat.v1 as tf + +FLAGS = flags.FLAGS + +################################################################################ +# High-level flags to configure hardware +################################################################################ + +flags.DEFINE_boolean( + 'multi_gpu', False, + 'Use all available GPUs on machine for batch parallelism. ' + 'Default: use single GPU.') + +################################################################################ +# High-level flags about training +################################################################################ + +flags.DEFINE_integer('batch_size', 4096, 'number of walkers') +flags.DEFINE_boolean('double_precision', False, + 'use double rather than single precision') + +################################################################################ +# Flags related to pretraining +################################################################################ + +flags.DEFINE_integer( + 'pretrain_iterations', 1000, + 'Number of iterations for which to pretrain the network ' + 'to match Hartree-Fock orbitals.') +flags.DEFINE_string('pretrain_basis', 'sto-3g', + 'Basis set used to run Hartree-Fock calculation in PySCF.') + +################################################################################ +# Flags related to optimization +################################################################################ + +flags.DEFINE_integer('iterations', 1000000, 'number of iterations') +flags.DEFINE_float('clip_el', 5.0, + 'If not none, scale at which to clip local energy') + +# Flags related to the learning rate +flags.DEFINE_float('learning_rate', 1.e-4, 'learning rate') +flags.DEFINE_float('learning_rate_decay', 1.0, + 'exponent of learning rate decay') +flags.DEFINE_float('learning_rate_delay', 10000.0, + 'set the scale of the rate decay') + +# Flags related to KFAC +flags.DEFINE_boolean('use_kfac', True, + 'If false, use ADAM, else use KFAC as optimizer') +flags.DEFINE_integer('kfac_invert_every', 1, 'See KFAC documentation') +flags.DEFINE_integer('kfac_cov_update_every', 1, 'See KFAC documentation') +flags.DEFINE_float('kfac_damping', 0.001, 'See KFAC documentation') +flags.DEFINE_float('kfac_cov_ema_decay', 0.95, 'See KFAC documentation') +flags.DEFINE_float('kfac_momentum', 0.0, 'See KFAC documentation') +flags.DEFINE_string('kfac_momentum_type', 'regular', 'See KFAC documentation') +flags.DEFINE_boolean('kfac_adapt_damping', False, 'See KFAC documentation') +flags.DEFINE_float('kfac_damping_adaptation_decay', 0.9, + 'See KFAC documentation') +flags.DEFINE_integer('kfac_damping_adaptation_interval', 5, + 'See KFAC documentation') +flags.DEFINE_float('kfac_min_damping', 1.e-4, 'See KFAC documentation') +flags.DEFINE_float('kfac_norm_constraint', 0.001, 'See KFAC documentation') + +################################################################################ +# Flags related to the system to solve for +################################################################################ + +flags.DEFINE_string('system_type', 'molecule', + 'Function to be called to create the system.') +flags.DEFINE_string( + 'system', 'LiH', 'If system_type is "molecule", the name of the molecule. ' + 'If system_type is "atom", the atomic symbol. ' + 'If system_type is "hn", the number of atoms in the ' + 'hydrogen chain.') +flags.DEFINE_integer( + 'system_charge', 0, + 'The overall charge of the system. Positive for cations ' + 'and negative for anions.') +flags.DEFINE_integer('system_dim', 3, + 'Number of dimensions of the system. Change with care.') +flags.DEFINE_string( + 'system_units', 'bohr', + 'Units of *input* coords of atoms. Either "bohr" or ' + '"angstrom". Internally work in a.u.; positions in ' + 'Angstroms are converged to Bohr.') + +# Flags related to diatomics, the hydrogen chain, and the hydrogen circle +flags.DEFINE_float( + 'system_separation', 0.0, + 'For the hydrogen chain and diatomic systems, the ' + 'separation between nuclei. For the H4 circle, the radius ' + 'of the circle. For diatomics, will default to the ' + 'equilibrium bond length if set to 0.') + +# Flags related to the hydrogen circle +flags.DEFINE_float('system_angle', np.pi / 4.0, + 'Angle from the x-axis for the H4 circle') + +################################################################################ +# Flags related to the MCMC chain +################################################################################ + +flags.DEFINE_integer( + 'mcmc_burn_in', 100, 'Number of burn in steps after pretraining. ' + 'If zero do not burn in or reinitialize walkers.') +flags.DEFINE_integer('mcmc_steps', 10, + 'Number of MCMC steps to make between network updates.') +flags.DEFINE_float( + 'mcmc_init_width', 0.8, + 'Width of (atom-centred) Gaussian used to generate initial ' + 'electron configurations.') +flags.DEFINE_float('mcmc_move_width', 0.02, + 'Width of Gaussian used for random moves.') +flags.DEFINE_list( + 'mcmc_init_means', '', + 'Iterable of 3*nelectrons giving the mean initial position ' + 'of each electron. Configurations are drawn using Gaussians ' + 'of width init_width at each 3D position. Alpha electrons ' + 'are listed before beta electrons. If empty, electrons are ' + 'assigned to atoms based upon the isolated atom spin ' + 'configuration.') + +################################################################################ +# Flags related to the network architecture +################################################################################ + +flags.DEFINE_enum( + 'network_architecture', 'ferminet', ['ferminet', 'slater'], + 'The choice of architecture to run the calculation with. ' + 'Either "ferminet" or "slater" for the Fermi Net and ' + 'standard Slater determinant respectively.') +flags.DEFINE_string( + 'hidden_units', '((256, 32), (256, 32), (256, 32), (256, 32))', + 'Number of hidden units in each layer of the network. If ' + 'the Fermi Net with one- and two-electron streams is used, ' + 'a tuple is provided for each layer, with the first ' + 'element giving the number of hidden units in the ' + 'one-electron stream and the second element giving the ' + 'number of units in the two-electron stream.') +flags.DEFINE_integer('determinants', 16, + 'Number of determinants in the Fermi Net') + +flags.DEFINE_boolean( + 'r12_en_features', True, + 'Include r12/distance features between electrons and nuclei') +flags.DEFINE_boolean( + 'r12_ee_features', True, + 'Include r12/distance features between pairs of electrons') +flags.DEFINE_boolean('pos_ee_features', True, + 'Include electron-electron position features') +flags.DEFINE_boolean( + 'use_envelope', True, + 'Include multiplicative exponentially-decaying envelope. ' + 'Calculations will not converge if set to False.') +flags.DEFINE_boolean( + 'backflow', False, 'Include backflow transformation in input coordinates. ' + 'Only for use if network_architecture == "slater". ' + 'Implies --build_backflow.') +flags.DEFINE_boolean( + 'build_backflow', False, 'Create backflow weights but do ' + 'not include backflow coordinate transformation. Use to ' + 'train a Slater-Jastrow architecture and then train a ' + 'Slater-Jastrow-Backflow architecture based on it.') +flags.DEFINE_boolean('residual', True, 'Use residual connections. Recommended.') +flags.DEFINE_list( + 'after_det', '1', + 'Comma-separated configuration of neural network after the ' + 'determinants. By default, just takes a weighted sum of ' + 'determinants with no nonlinearity.') + +# Flags related to the Jastrow factor +flags.DEFINE_boolean('jastrow_en', False, + 'Include electron-nuclear Jastrow factor.') +flags.DEFINE_boolean('jastrow_ee', False, + 'Include electron-electron Jastrow factor.') +flags.DEFINE_boolean('jastrow_een', False, + 'Include electron-electron-nuclear Jastrow factor.') + +################################################################################ +# Flags related to logging, checkpointing, and restoring +################################################################################ + +flags.DEFINE_integer('stats_frequency', 1, + 'Iterations between logging of stats.') +flags.DEFINE_float('save_frequency', 10.0, + 'Minutes between saving network params.') +flags.DEFINE_string( + 'result_folder', '.', 'Path to save results and checkpoints to. A new ' + 'subdirectory will be created for every experiment. ' + 'By default, save locally.') +flags.DEFINE_string('restore_path', '', + 'Path containing checkpoint to restore network from.') +flags.DEFINE_boolean( + 'log_walkers', False, + 'Whether or not to log the values of all walkers every ' + 'iteration. Use with caution!!! Produces a lot of data ' + 'very quickly.') +flags.DEFINE_boolean( + 'log_local_energies', False, + 'Whether or not to log all local energies for each walker ' + 'at each step.') +flags.DEFINE_boolean( + 'log_wavefunction', False, + 'Whether or not to log all values of wavefunction for ' + 'each walker at each step.') + +################################################################################ +# Flags related to debugging +################################################################################ + +flags.DEFINE_boolean( + 'check_loss', False, 'Apply gradient update only if the loss is not NaN. ' + 'If true, training could be slightly slower but the ' + 'checkpoint written out when a NaN is detected will be ' + 'with the network weights which led to ' + 'the NaN.') +flags.DEFINE_boolean( + 'determinism', False, 'CPU only mode that also enforces determinism.' + 'Will run significantly slower if used.') +flags.DEFINE_integer( + 'random_seed', 1, 'Only works in determinism mode. ' + 'Set a random seed for the run.') +flags.DEFINE_string('graph_path', '', 'File to write graph to.') + + +def main(argv): + del argv + tf.enable_resource_variables() + + if FLAGS.determinism: + tf.set_random_seed(FLAGS.random_seed) + logging.info('Activating determinism mode. Expect slow performance.') + + # Create folders for logging + result_path = os.path.join( + FLAGS.result_folder, 'ferminet_results_' + + datetime.datetime.ctime(datetime.datetime.now()).replace(' ', '_')) + if not os.path.isdir(FLAGS.result_folder): + os.mkdir(FLAGS.result_folder) + os.mkdir(result_path) + + # Save the command line arguments for reproducibility + with open(os.path.join(result_path, 'flags.txt'), 'w') as f: + f.write(' '.join(sys.argv[1:]) + '\n') + + # Run function to create the system from flags. + logging.info('System Type: %s', FLAGS.system_type) + logging.info('System: %s', FLAGS.system) + if FLAGS.system_type == 'molecule': + molecule, spins = system.molecule( + FLAGS.system, + bond_length=FLAGS.system_separation, + units=FLAGS.system_units) + elif FLAGS.system_type == 'atom': + molecule, spins = system.atom(FLAGS.system, charge=FLAGS.system_charge) + elif FLAGS.system_type == 'hn': + molecule, spins = system.hn( + int(FLAGS.system), + FLAGS.system_separation, + charge=FLAGS.system_charge, + units=FLAGS.system_units) + elif FLAGS.system_type == 'h4_circle': + molecule, spins = system.h4_circle( + FLAGS.system_separation, FLAGS.system_angle, units=FLAGS.system_units) + else: + raise ValueError('Not a recognized system type: %s' % FLAGS.system_type) + + network_config = train.NetworkConfig( + architecture=FLAGS.network_architecture, + hidden_units=ast.literal_eval(FLAGS.hidden_units), + determinants=FLAGS.determinants, + r12_en_features=FLAGS.r12_en_features, + r12_ee_features=FLAGS.r12_ee_features, + pos_ee_features=FLAGS.pos_ee_features, + use_envelope=FLAGS.use_envelope, + backflow=FLAGS.backflow, + build_backflow=FLAGS.build_backflow, + residual=FLAGS.residual, + after_det=tuple(int(x) for x in FLAGS.after_det), + jastrow_en=FLAGS.jastrow_en, + jastrow_ee=FLAGS.jastrow_ee, + jastrow_een=FLAGS.jastrow_een, + ) + + pretrain_config = train.PretrainConfig( + iterations=FLAGS.pretrain_iterations, + basis=FLAGS.pretrain_basis, + ) + + optim_config = train.OptimConfig( + iterations=FLAGS.iterations, + learning_rate=FLAGS.learning_rate, + learning_rate_decay=FLAGS.learning_rate_decay, + learning_rate_delay=FLAGS.learning_rate_delay, + clip_el=FLAGS.clip_el, + use_kfac=FLAGS.use_kfac, + check_loss=FLAGS.check_loss, + deterministic=FLAGS.determinism, + ) + + kfac_config = train.KfacConfig( + invert_every=FLAGS.kfac_invert_every, + cov_update_every=FLAGS.kfac_cov_update_every, + damping=FLAGS.kfac_damping, + cov_ema_decay=FLAGS.kfac_cov_ema_decay, + momentum=FLAGS.kfac_momentum, + momentum_type=FLAGS.kfac_momentum_type, + adapt_damping=FLAGS.kfac_adapt_damping, + damping_adaptation_decay=FLAGS.kfac_damping_adaptation_decay, + damping_adaptation_interval=FLAGS.kfac_damping_adaptation_interval, + min_damping=FLAGS.kfac_min_damping, + ) + + mcmc_config = train.MCMCConfig( + burn_in=FLAGS.mcmc_burn_in, + steps=FLAGS.mcmc_steps, + init_width=FLAGS.mcmc_init_width, + move_width=FLAGS.mcmc_move_width, + init_means=tuple(float(x) for x in FLAGS.mcmc_init_means), + ) + + logging_config = train.LoggingConfig( + result_path=result_path, + save_frequency=FLAGS.save_frequency, + restore_path=FLAGS.restore_path, + stats_frequency=FLAGS.stats_frequency, + walkers=FLAGS.log_walkers, + wavefunction=FLAGS.log_wavefunction, + local_energy=FLAGS.log_local_energies, + config={ + 'system_type': FLAGS.system_type, + 'system': FLAGS.system, + 'system_units': FLAGS.system_units, + 'system_separation': FLAGS.system_separation, + 'system_charge': FLAGS.system_charge, + }, + ) + + train.train( + molecule=molecule, + spins=spins, + batch_size=FLAGS.batch_size, + network_config=network_config, + pretrain_config=pretrain_config, + optim_config=optim_config, + kfac_config=kfac_config, + mcmc_config=mcmc_config, + logging_config=logging_config, + multi_gpu=FLAGS.multi_gpu, + double_precision=FLAGS.double_precision, + graph_path=FLAGS.graph_path) + logging.info('Fermi Net training run completed successfully.') + + +if __name__ == '__main__': + app.run(main) diff --git a/ferminet/hamiltonian.py b/ferminet/hamiltonian.py new file mode 100644 index 0000000..c2141a3 --- /dev/null +++ b/ferminet/hamiltonian.py @@ -0,0 +1,211 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for constructing Hamiltonians and setting up VMC calculations.""" + +import numpy as np +import tensorflow.compat.v1 as tf + + +def kinetic_from_log(f, x): + r"""Compute -1/2 \nabla^2 \psi / \psi from log|psi|.""" + with tf.name_scope('kinetic_from_log'): + df = tf.gradients(f, x)[0] + lapl_elem = tf.map_fn( + lambda i: tf.gradients(tf.expand_dims(df[..., i], -1), x)[0][..., i], + tf.range(x.shape[1]), + dtype=x.dtype.base_dtype, + back_prop=False, + parallel_iterations=20) + lapl = (tf.reduce_sum(lapl_elem, axis=0) + + tf.reduce_sum(df**2, axis=-1)) + return -0.5*tf.expand_dims(lapl, -1) + + +def operators(atoms, + nelectrons, + potential_epsilon=0.0): + """Creates kinetic and potential operators of Hamiltonian in atomic units. + + Args: + atoms: list of Atom objects for each atom in the system. + nelectrons: number of electrons + potential_epsilon: Epsilon used to smooth the divergence of the 1/r + potential near the origin for algorithms with numerical stability issues. + + Returns: + The functions that generates the kinetic and potential energy as a TF op. + """ + vnn = 0.0 + for i, atom_i in enumerate(atoms): + for atom_j in atoms[i+1:]: + qij = float(atom_i.charge * atom_j.charge) + vnn += qij / np.linalg.norm(atom_i.coords_array - atom_j.coords_array) + + def smooth_norm(x): + with tf.name_scope('smooth_norm'): + if potential_epsilon == 0.0: + return tf.norm(x, axis=1, keepdims=True) + else: + return tf.sqrt(tf.reduce_sum(x**2 + potential_epsilon**2, + axis=1, + keepdims=True)) + + def nuclear_potential(xs): + """Calculates nuclear potential for set of electron positions.""" + with tf.name_scope('Vne'): + v = [] + for atom in atoms: + charge = tf.constant(atom.charge, dtype=xs[0].dtype) + coords = tf.constant(atom.coords, dtype=xs[0].dtype) + v.extend([-charge / smooth_norm(coords - x) for x in xs]) + return tf.add_n(v) + + def electronic_potential(xs): + """Calculates electronic potential for set of electron positions.""" + with tf.name_scope('Vee'): + if len(xs) > 1: + v = [] + for (i, ri) in enumerate(xs): + v.extend([ + 1.0 / smooth_norm(ri - rj) for rj in xs[i + 1:] + ]) + return tf.add_n(v) + else: + return 0.0 + + def nuclear_nuclear(dtype): + """Calculates the nuclear-nuclear interaction contribution.""" + with tf.name_scope('Vnn'): + return tf.constant(vnn, dtype=dtype) + + def potential(x): + """Calculates the total potential energy at each electron position.""" + xs = tf.split(x, nelectrons, axis=1) + return (nuclear_potential(xs) + electronic_potential(xs) + + nuclear_nuclear(xs[0].dtype)) + + return kinetic_from_log, potential + + +def exact_hamiltonian(atoms, + nelectrons, + potential_epsilon=0.0): + """Construct function that evaluates exact Hamiltonian of a system. + + Args: + atoms: list of Atom objects for each atom in the system. + nelectrons: number of electrons + potential_epsilon: Epsilon used to smooth the divergence of the 1/r + potential near the origin for algorithms with numerical stability issues. + + Returns: + A functions that generates the wavefunction and hamiltonian op. + """ + k_fn, v_fn = operators(atoms, nelectrons, potential_epsilon) + def _hamiltonian(f, x): + # kinetic_from_log actually computes -1/2 \nabla^2 f / f, so multiply by f + logpsi, signpsi = f(x) + psi = tf.exp(logpsi) * signpsi + hpsi = psi * (k_fn(logpsi, x) + v_fn(x)) + return psi, hpsi + return _hamiltonian + + +def r12_features(x, atoms, nelectrons, keep_pos=True, flatten=False, + atomic_coords=False): + """Adds physically-motivated features depending upon electron distances. + + The tensor of electron positions is extended to include the distance of each + electron to each nucleus and distance between each pair of electrons. + + Args: + x: electron positions. Tensor either of shape (batch_size, nelectrons*ndim), + or (batch_size, nelectrons, ndim), where ndim is the dimensionality of the + system (usually 3). + atoms: list of Atom objects for each atom in the system. + nelectrons: number of electrons. + keep_pos: If true, includes the original electron positions in the output + flatten: If true, return the distances as a flat vector for each element of + the batch. If false, return the atom-electron distances and electron- + electron distances each as 3D arrays. + atomic_coords: If true, replace the original position of the electrons with + the position of the electrons relative to all atoms. + + Returns: + If flatten is true, keep_pos is true and atomic_coords is false: + tensor of shape (batch_size, ndim*Ne + Ne*Na + Ne(Ne-1)/2), where Ne (Na) + is the number of electrons (atoms). The first ndim*Ne terms are the + original x, the next Ne terms are |x_i - R_1|, where R_1 is the position + of the first nucleus (and so on for each atom), and the remaining terms + are |x_i - x_j| for each (i,j) pair, where i and j run over all electrons, + with i varied slowest. + If flatten is true and keep_pos is false: it does not include the first + ndim*Ne features. + If flatten is false and keep_pos is false: tensors of shape + (batch_size, Ne, Na) and (batch_size, Ne, Ne) + If flatten is false and keep_pos is true: same as above, and also a tensor + of size (batch_size, Ne, ndim) + If atomic_coords is true: the same as if keep_pos is true, except the + ndim*Ne coordinates corresponding to the original positions are replaced + by ndim*Ne*Na coordinates corresponding to the different from each + electron position to each atomic position. + """ + with tf.name_scope('r12_features'): + if len(x.shape) == 2: + xs = tf.reshape(x, [x.shape[0], nelectrons, -1]) + else: + xs = x + coords = tf.stack([ + tf.constant(atom.coords, dtype=x.dtype.base_dtype) for atom in atoms + ]) + coords = tf.expand_dims(tf.expand_dims(coords, 0), 0) + xsa = tf.expand_dims(xs, 2) - coords # xs in atomic coordinates + + r_ae = tf.norm(xsa, axis=-1) + r_ee = np.zeros((nelectrons, nelectrons), dtype=object) + for i in range(nelectrons): + for j in range(i+1, nelectrons): + r_ee[i, j] = tf.norm(xs[:, i, :] - xs[:, j, :], axis=1, keepdims=True) + + if flatten: + r_ae = tf.reshape(r_ae, [r_ae.shape[0], -1]) + if nelectrons > 1: + r_ee = tf.concat( + r_ee[np.triu_indices(nelectrons, k=1)].tolist(), axis=1) + else: + r_ee = tf.zeros([r_ae.shape[0], 0]) + if keep_pos: + if atomic_coords: + xsa = tf.reshape(xsa, [xsa.shape[0], -1]) + return tf.concat([r_ae, r_ee, xsa], axis=1) + else: + return tf.concat([r_ae, r_ee, x], axis=1) + else: + return tf.concat([r_ae, r_ee], axis=1) + else: + zeros_like = tf.zeros((xs.shape[0], 1), dtype=x.dtype.base_dtype) + for i in range(nelectrons): + r_ee[i, i] = zeros_like + for j in range(i): + r_ee[i, j] = r_ee[j, i] + r_ee = tf.transpose(tf.stack(r_ee.tolist()), [2, 0, 1, 3]) + if keep_pos: + if atomic_coords: + return r_ae, r_ee, xsa + else: + return r_ae, r_ee, xs + else: + return r_ae, r_ee diff --git a/ferminet/mcmc.py b/ferminet/mcmc.py new file mode 100644 index 0000000..723e0c0 --- /dev/null +++ b/ferminet/mcmc.py @@ -0,0 +1,138 @@ +# Lint as: python3 +# Copyright 2020 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Simple Markov Chain Monte Carlo data generator.""" + +import functools +import tensorflow.compat.v1 as tf + + +class MCMC: + """Simple Markov Chain Monte Carlo implementation for sampling from |f|^2.""" + + def __init__(self, + network, + batch_size, + init_mu, + init_sigma, + move_sigma, + dtype=tf.float32): + """Constructs MCMC object. + + Args: + network: network to sample from according to the square of the output. + batch_size: batch size - number of configurations (walkers) to generate. + init_mu: mean of a truncated normal distribution to draw from to generate + initial configurations for each coordinate (i.e. of length 3 x number of + electrons). + distribution. + init_sigma: standard deviation of a truncated normal distribution to draw + from to generate initial configurations. + move_sigma: standard deviation of random normal distribution from which to + draw random moves. + dtype: the data type of the configurations. + """ + + strategy = tf.distribute.get_strategy() + nreplicas = strategy.num_replicas_in_sync + self._dtype = dtype.base_dtype + self.network = network + self._init_mu = init_mu + self._init_sigma = init_sigma + self._move_sigma = move_sigma + self.walkers = tf.get_variable( + 'walkers', + initializer=self._rand_init((nreplicas, batch_size)), + trainable=False, + use_resource=True, + dtype=self._dtype) + self.psi = self.update_psi() + self.move_acc = tf.constant(0.0) + + def _rand_init(self, batch_dims): + """Generate a random set of samples from a uniform distribution.""" + return tf.concat( + [ + tf.random.truncated_normal( # pylint: disable=g-complex-comprehension + shape=(*batch_dims, 1), + mean=mu, + stddev=self._init_sigma, + dtype=self._dtype) for mu in self._init_mu + ], + axis=-1) + + def reinitialize_walkers(self): + walker_reset = self.walkers.assign( + self._rand_init(self.walkers.shape.as_list()[:-1])) + with tf.control_dependencies([walker_reset]): + psi_reset = self.update_psi() + return walker_reset, psi_reset + + def update_psi(self): + strategy = tf.distribute.get_strategy() + psi_per_gpu = strategy.experimental_run( + lambda: self.network(self.walkers_per_gpu)[0]) + self.psi = tf.stack( + strategy.experimental_local_results(psi_per_gpu), axis=0) + return self.psi + + @property + def walkers_per_gpu(self): + replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group + return self.walkers[replica_id] + + @property + def psi_per_gpu(self): + replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group + return self.psi[replica_id] + + def step(self): + """Returns ops for one Metropolis-Hastings step across all replicas.""" + strategy = tf.distribute.get_strategy() + walkers, psi, move_acc = strategy.experimental_run( + functools.partial(self._mh_step, step_size=self._move_sigma)) + update_walkers = self.walkers.assign( + tf.stack(strategy.experimental_local_results(walkers), axis=0)) + self.psi = tf.stack(strategy.experimental_local_results(psi), axis=0) + self.move_acc = tf.reduce_mean( + strategy.experimental_local_results(move_acc)) + return update_walkers, self.psi, self.move_acc + + def _mh_step(self, step_size): + """Returns ops for one Metropolis-Hastings step in a replica context.""" + walkers, psi = self.walkers_per_gpu, self.psi_per_gpu + new_walkers = walkers + tf.random.normal( + shape=walkers.shape, stddev=step_size, dtype=self._dtype) + new_psi = self.network(new_walkers)[0] + pmove = tf.squeeze(2 * (new_psi - psi)) + pacc = tf.log( + tf.random_uniform(shape=walkers.shape.as_list()[:1], dtype=self._dtype)) + decision = tf.less(pacc, pmove) + with tf.control_dependencies([decision]): + new_walkers = tf.where(decision, new_walkers, walkers) + new_psi = tf.where(decision, new_psi, psi) + move_acc = tf.reduce_mean(tf.cast(decision, tf.float32)) + return new_walkers, new_psi, move_acc + + def stats_ops(self, log_walkers=False): + """Returns op for evaluating the move acceptance probability. + + Args: + log_walkers: also include the complete walker configurations (slow, lots + of data). + """ + stats = {'pmove': self.move_acc} + if log_walkers: + stats['walkers'] = self.walkers + return stats diff --git a/ferminet/mean_corrected_kfac_opt.py b/ferminet/mean_corrected_kfac_opt.py new file mode 100644 index 0000000..dfb9401 --- /dev/null +++ b/ferminet/mean_corrected_kfac_opt.py @@ -0,0 +1,52 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Extension to KFAC that adds correction term for non-normalized densities.""" + +import kfac + + +ip_p = kfac.utils.ip_p +sum_p = kfac.utils.sum_p +sprod_p = kfac.utils.sprod_p + + +class MeanCorrectedKfacOpt(kfac.PeriodicInvCovUpdateKfacOpt): + """Like KFAC, but with all moments mean-centered.""" + + def __init__(self, *args, **kwargs): + """batch_size is mandatory, estimation_mode='exact' or 'exact_GGN'.""" + + kwargs["compute_params_stats"] = True + + if not kwargs["estimation_mode"].startswith("exact"): # pytype: disable=attribute-error + raise ValueError("This is probably wrong.") + + super(MeanCorrectedKfacOpt, self).__init__(*args, **kwargs) + + def _multiply_preconditioner(self, vecs_and_vars): + + # Create a w list with the same order as vecs_and_vars + w_map = {var: ps for (ps, var) in zip(self.params_stats, + self.registered_variables)} + w = tuple((w_map[var], var) for (_, var) in vecs_and_vars) + + r = super(MeanCorrectedKfacOpt, self)._multiply_preconditioner( + vecs_and_vars) + u = super(MeanCorrectedKfacOpt, self)._multiply_preconditioner(w) + + scalar = ip_p(w, r) / (1. + ip_p(w, w)) + + return sum_p(r, sprod_p(scalar, u)) diff --git a/ferminet/networks.py b/ferminet/networks.py new file mode 100644 index 0000000..09d3cda --- /dev/null +++ b/ferminet/networks.py @@ -0,0 +1,1300 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Network building for quantum Monte Carlo calculations.""" + +# Using networks with MCMC and optimisation +# +# We create different instances of the same model, all using the same parameters +# via Sonnet's variable sharing: one to run pre-training, one to sample electron +# configurations using MCMC and one for use with the optimiser. This requires +# care when using K-FAC: normally one would only have multiple registrations of +# network instances if using multi-tower mode, which we aren't. +# K-FAC detects variable re-use from the variable scope, which gets set to true +# by Sonnet when we create additional instances of the model, in layer +# registration, and hence results in errors because we're not using multi-tower. +# Thus we override the defaults by explicitly setting reuse=False in all layer +# registrion calls. + +# pylint: disable=g-long-lambda + +import functools +import typing + +from ferminet import hamiltonian +import sonnet as snt +import tensorflow.compat.v1 as tf + + +def extend(x, y): + """Add extra dimensions so x can be broadcast with y.""" + if isinstance(x, float): + return x + rank = y.shape.rank + rank_ = x.shape.rank + assert rank_ <= rank + for _ in range(rank - rank_): + x = tf.expand_dims(x, -1) + return x + + +def flatten(seq): + """Recursively flattens a sequence.""" + lst = [] + for el in seq: + if isinstance(el, (list, tuple)): + lst.extend(flatten(el)) + else: + lst.append(el) + return lst + + +@tf.custom_gradient +def sign_det(x): + """Sign of determinant op whose gradient doesn't crash when the sign is 0.""" + with tf.name_scope('sign_det'): + sign = tf.linalg.slogdet(x)[0] + + def grad(_): + return tf.zeros_like(x) + + return sign, grad + + +@tf.custom_gradient +def determinant(x): + """Determinant op with well-defined gradient for singular matrices.""" + with tf.name_scope('determinant'): + with tf.device('/cpu:0'): + s, u, v = tf.linalg.svd(x) + + sign = sign_det(x) + + def grad(dx): + ss = tf.tile(s[..., None], len(s.shape) * [1] + [s.shape[-1]]) + ss = tf.linalg.set_diag(ss, tf.ones_like(s)) + z = tf.reduce_prod(ss, axis=-2) + adj = tf.matmul(u, tf.matmul(tf.linalg.diag(z), v, transpose_b=True)) + signdx = tf.expand_dims(tf.expand_dims(sign * dx, -1), -1) + return signdx * adj + + return sign * tf.reduce_prod(s, axis=-1), grad + + +@tf.custom_gradient +def log_determinant(x): + """Log-determinant op with well-defined gradient for singular matrices.""" + with tf.name_scope('determinant'): + with tf.device('/cpu:0'): + s, u, v = tf.linalg.svd(x) + + sign = sign_det(x) + + def grad(dsign, dlog): + del dsign + # d ln|det(X)|/dX = transpose(X^{-1}). This uses pseudo-inverse via SVD + # and so is not numerically stable... + adj = tf.matmul(u, + tf.matmul(tf.linalg.diag(1.0 / s), v, transpose_b=True)) + return tf.expand_dims(tf.expand_dims(dlog, -1), -1) * adj + + return (sign, tf.reduce_sum(tf.log(s), axis=-1)), grad + + +def cofactor(u, s, v, shift=0.0): + r"""Calculates the cofactor matrix from singular value decomposition. + + Given M = U S V*, where * indicates the conjugate transpose, the cofactor + of M, M^C = det(A) A^{-T}, is given by M^C = det(U) det(V) U \Gamma V^T, + where \Gamma_{ij} = \delta_{ij} \prod_{k!=i} s_k, s_k is the k-th singular + value of M. The latter equation for M^C is well behaved even for singular + matrices. + + Note: + + det(U), det(V) \in {-1, +1} are calculated as part of the determinant + evaluation and so not repeated here. + + Args: + u: unitary matrix, U, from SVD. + s: singular values, S, from SVD. + v: unitary matrix, V, from SVD. + shift (optional): constant to subtract in log domain for stability. + + Returns: + cofactor matrix up to the sign given by det(U) det(V), i.e. + M^C / (det(U) det(V)) = U \Gamma V^T, divided by exp(shift). + """ + return tf.matmul( + u, tf.matmul(tf.linalg.diag(gamma(s, shift)), v, transpose_b=True)) + + +def gamma(s, shift=0.0): + r"""Calculates diagonal of \Gamma_{ij} = \delta_{ij} \Prod_{k!=i} s_k. + + Args: + s: singular values of matrix. + shift (optional): constant to subtract in log domain for stability. + + Returns: + Diagonal of \Gamma over the outer dimension of s, divided by exp(shift). + """ + ls = tf.log(s) + lower = tf.cumsum(ls, axis=-1, exclusive=True) + upper = tf.cumsum(ls, axis=-1, exclusive=True, reverse=True) + return tf.exp(lower + upper - shift) + + +def rho(s, shift=0.0): + r"""Calculates \rho_ij = \prod_{k!=i,j} s_k, \rho_ii = 0. + + Args: + s: singular values of matrix. + shift (optional): constant to subtract in log domain for stability. + + Returns: + \rho, the first derivative of \Gamma (see `gamma`), divided by exp(shift). + """ + # Product of singular elements before and after two indices, i, j, + # i.e. v_{ij} = \Prod_{kj} s_i s_j -- outer product of forward and + # reverse cumulative products. Valid only for j>i. + # Computed in log domain for numerical stability. + ls = tf.log(s) + lower_cumsum = tf.cumsum(ls, axis=-1, exclusive=True) + upper_cumsum = tf.cumsum(ls, axis=-1, reverse=True, exclusive=True) + v = tf.expand_dims(lower_cumsum, -1) + tf.expand_dims(upper_cumsum, -2) + + # Product of singular elements between two indices, \Prod_{i 1: + s1, u1, v1 = tf.linalg.svd(x1) + if x2.shape[-1] > 1: + s2, u2, v2 = tf.linalg.svd(x2) + + # Computing the sign of the determinant u and v instead of x means this + # works even with singular matrices. + if x1.shape[-1] > 1: + sign1 = sign_det(u1) * sign_det(v1) + logdet1 = tf.reduce_sum(tf.log(s1), axis=-1) + else: + sign1 = tf.sign(x1[..., 0, 0]) + logdet1 = tf.log(tf.abs(x1[..., 0, 0])) + + if x2.shape[-1] > 1: + sign2 = sign_det(u2) * sign_det(v2) + logdet2 = tf.reduce_sum(tf.log(s2), axis=-1) + else: + sign2 = tf.sign(x2[..., 0, 0]) + logdet2 = tf.log(tf.abs(x2[..., 0, 0])) + + sign = sign1 * sign2 + logdet = logdet1 + logdet2 + + logdet_max1 = tf.reduce_max(logdet1, axis=-1, keepdims=True) + logdet_max2 = tf.reduce_max(logdet2, axis=-1, keepdims=True) + det = tf.exp(logdet - logdet_max1 - logdet_max2) * sign + output = tf.matmul(det, w) + sign_out = tf.sign(output) + log_out = tf.log(tf.abs(output)) + logdet_max1 + logdet_max2 + + @tf.custom_gradient + def logdet_matmul_grad(x1, x2, w, grad_log): + """Numerically stable gradient of log(abs(sum_i w_i |x1_i| * |x2_i|)). + + Args: + x1: Tensor of shape [batch size, dets in, n, n]. + x2: Tensor of shape [batch size, dets in, m, m]. + w: Weight matrix of shape [dets in, dets out]. + grad_log: Tensor of shape [batch_size, dets_out] by which to + left-multiply the Jacobian (aka the reverse sensitivity). + + Returns: + Ops that compute the gradient of log(abs(matmul(det(x1) * det(x2), w))). + The ops give the gradients with respect to the inputs x1, x2, w. + """ + # This is missing a factor of exp(-logdet_max1-logdet_max2), which is + # instead picked up by including it in the terms adj1*det2 and adj2*det1 + glog_out = grad_log / output + dout = tf.matmul(glog_out, w, transpose_b=True) + + if x1.shape[-1] > 1: + adj1 = cofactor(u1, s1, v1, extend(logdet_max1, s1)) * sign1[..., None, + None] + else: + adj1 = tf.ones_like(x1) * extend(tf.exp(-logdet_max1), x1) + + if x2.shape[-1] > 1: + adj2 = cofactor(u2, s2, v2, extend(logdet_max2, s2)) * sign2[..., None, + None] + else: + adj2 = tf.ones_like(x2) * extend(tf.exp(-logdet_max2), x2) + + det1 = tf.exp(logdet1 - logdet_max1) * sign1 + det2 = tf.exp(logdet2 - logdet_max2) * sign2 + + dx1 = adj1 * (det2 * dout)[..., None, None] + dx2 = adj2 * (det1 * dout)[..., None, None] + dw = tf.matmul(det, glog_out, transpose_a=True) + + def grad(grad_dx1, grad_dx2, grad_dw): + """Stable gradient of gradient of log(abs(sum_i w_i |x1_i| * |x2_i|)). + + Args: + grad_dx1: Tensor of shape [batch size, dets in, n, n]. + grad_dx2: Tensor of shape [batch size, dets in, m, m]. + grad_dw: Tensor of shape [dets in, dets out]. + + Returns: + Ops that compute the gradient of the gradient of + log(abs(matmul(det(x1) * det(x2), w))). The ops give the gradients + with respect to the outputs of the gradient op dx1, dx2, dw and the + reverse sensitivity grad_log. + """ + # Terms that appear repeatedly in different gradients. + det_grad_dw = tf.matmul(det, grad_dw) + # Missing a factor of exp(-2logdet_max1-2logdet_max2) which is included\ + # via terms involving r1, r2, det1, det2, adj1, adj2 + glog_out2 = grad_log / (output**2) + + adj_dx1 = tf.reduce_sum(adj1 * grad_dx1, axis=[-1, -2]) + adj_dx2 = tf.reduce_sum(adj2 * grad_dx2, axis=[-1, -2]) + + adj_dx1_det2 = det2 * adj_dx1 + adj_dx2_det1 = det1 * adj_dx2 + adj_dx = adj_dx2_det1 + adj_dx1_det2 + + if x1.shape[-1] > 1: + r1 = rho(s1, logdet_max1) + dadj1 = (grad_cofactor(u1, v1, r1, grad_dx1) * sign1[..., None, None]) + else: + dadj1 = tf.zeros_like(x1) + + if x2.shape[-1] > 1: + r2 = rho(s2, logdet_max2) + dadj2 = (grad_cofactor(u2, v2, r2, grad_dx2) * sign2[..., None, None]) + else: + dadj2 = tf.zeros_like(x2) + + # Computes gradients wrt x1 and x2. + ddout_w = ( + tf.matmul(glog_out, grad_dw, transpose_b=True) - + tf.matmul(glog_out2 * det_grad_dw, w, transpose_b=True)) + ddout_x = tf.matmul( + glog_out2 * tf.matmul(adj_dx, w), w, transpose_b=True) + + grad_x1 = ( + adj1 * (det2 * + (ddout_w - ddout_x) + adj_dx2 * dout)[..., None, None] + + dadj1 * (dout * det2)[..., None, None]) + grad_x2 = ( + adj2 * (det1 * + (ddout_w - ddout_x) + adj_dx1 * dout)[..., None, None] + + dadj2 * (dout * det1)[..., None, None]) + + adj_dx_w = tf.matmul(adj_dx, w) + + # Computes gradient wrt w. + grad_w = ( + tf.matmul(adj_dx, glog_out, transpose_a=True) - tf.matmul( + det, glog_out2 * (det_grad_dw + adj_dx_w), transpose_a=True)) + + # Computes gradient wrt grad_log. + grad_grad_log = (det_grad_dw + adj_dx_w) / output + return grad_x1, grad_x2, grad_w, grad_grad_log + + return (dx1, dx2, dw), grad + + def grad(grad_log, grad_sign): + """Computes gradient wrt log(w*det(x1)*det(x2)).""" + del grad_sign + return logdet_matmul_grad(x1, x2, w, grad_log) + + return (log_out, sign_out), grad + + +class TensorMlp(snt.AbstractModule): + """Regular MLP, but works with rank-2, -3, or -4 tensors as input.""" + + def __init__(self, + layers, + rank=0, + use_bias=True, + activate_final=True, + residual=False, + gain=1.0, + stddev=1.0, + name='tensor_mlp'): + """Create an MLP with different input ranks. + + Args: + layers: list of ints, with number of units in each layer + rank: Rank of the input, not counting batch or feature dimension. If 0, + creates a normal MLP. If 1, uses 1D convolutions with kernel size 1. If + 2, uses 2D convolutions with 1x1 kernel. + use_bias: If false, leave bias out of linear layers. + activate_final: Boolean. If true, add nonlinearity to last layer. + residual: use residual connections in each layer. + gain: gain used in orthogonal initializer for weights in each linear + layer. + stddev: standard deviation used in random normal initializer for biases in + each linear layer. + name: String. Sonnet default. + + Raises: + ValueError: if the rank is not 0, 1 or 2. + """ + super(TensorMlp, self).__init__(name=name) + initializers = {'w': tf.initializers.orthogonal(gain=gain)} + if use_bias: + initializers['b'] = tf.initializers.random_normal(stddev=stddev) + if rank == 0: + self._linear = lambda x: snt.Linear( + x, use_bias=use_bias, initializers=initializers) + elif rank == 1: + self._linear = lambda x: snt.Conv1D( + output_channels=x, + kernel_shape=1, + use_bias=use_bias, + initializers=initializers) + elif rank == 2: + self._linear = lambda x: snt.Conv2D( + output_channels=x, + kernel_shape=1, + use_bias=use_bias, + initializers=initializers) + else: + raise ValueError('Rank for TensorMlp must be 0, 1 or 2.') + self._rank = rank + self._nlayers = layers + self._use_bias = use_bias + self._activate_final = activate_final + self._use_residual = residual + + def _build(self, inputs, layer_collection=None): + if self._nlayers: + x = inputs + self._layers = [] + for i, h in enumerate(self._nlayers): + self._layers.append(self._linear(h)) + y = self._layers[-1](x) + if layer_collection: + if self._use_bias: + params = (self._layers[-1].w, self._layers[-1].b) + else: + params = self._layers[-1].w + + if self._rank == 0: + layer_collection.register_fully_connected(params, x, y, reuse=False) + elif self._rank == 1: + layer_collection.register_conv1d( + params, [1, 1, 1], 'SAME', x, y, reuse=False) + else: + layer_collection.register_conv2d( + params, [1, 1, 1, 1], 'SAME', x, y, reuse=False) + + if i < len(self._nlayers) - 1 or self._activate_final: + y = tf.nn.tanh(y) + if self._use_residual and x.shape[-1].value == h: + residual = x + else: + residual = 0.0 + x = y + residual + return x + return inputs # if layers is an empty list, just act as the identity + + +class ParallelNet(snt.AbstractModule): + """A symmetric network with parallel streams for rank 1 and 2 features. + + Rank 2 features propagate with parallel independent channels. + Rank 1 features are able to input from other channels and rank 2 stream. + + Attributes: + shape: number of alpha and beta electrons. + layers: iterable containing integers pairs containing description of layers- + i.e tuples containing (num_rank_one_features, num_rank_two_features) + use_last_layer: bool. whether or not return rank-2 features at end of net. + residual: bool. whether or not to use residual connections. + name: for Sonnet namespacing. + """ + + def __init__(self, + shape, + layers, + use_last_layer=True, + residual=False, + name='parallel_net'): + super(ParallelNet, self).__init__(name=name) + self._shape = shape # tuple of input shapes + self._n = sum(shape) + self._layers = layers + self._use_last_layer = use_last_layer + self._use_residual = residual + + def _build(self, x1, x2, r_ee, layer_collection=None): + """Construct the parallel network from rank 1 and rank 2 inputs. + + Args: + x1: rank 1 input features. tensor of shape (batch, n_electrons, + n_rank_one_inputs) + x2: rank 2 input features. tensor of shape (batch, n_electrons, + n_electrons, n_rank_two_inputs) + r_ee: distances between electrons. tensor of shape (batch, n_electrons, + n_electrons) + layer_collection: KFAC layer collection or None. + + Returns: + x1: rank 1 output features. + tensor of shape (batch, n_electrons, n_rank_one_outputs) + x2: rank 2 output features or None if use_last_layer is False. + if not None tensor is of shape (batch, n_electrons, n_rank_two_outputs) + """ + self._w1 = [] + self._w2 = [] + + initializers = { + 'w': tf.initializers.orthogonal(), + 'b': tf.initializers.random_normal() + } + for i, (nc1, nc2) in enumerate(self._layers): + xs1 = tf.split(x1, self._shape, axis=1) + xs2 = tf.split(x2, self._shape, axis=1) + + # mean field features + mf1 = [ + tf.tile( + tf.reduce_mean(x, axis=1, keepdims=True), [1, x1.shape[1], 1]) + for x in xs1 + ] + mf2 = [tf.reduce_mean(x, axis=1) for x in xs2] + y1 = tf.concat([x1] + mf1 + mf2, axis=2) + + self._w1.append( + snt.Conv1D( + output_channels=nc1, kernel_shape=1, initializers=initializers)) + out1 = self._w1[-1](y1) + + if layer_collection: + layer_collection.register_conv1d((self._w1[-1].w, self._w1[-1].b), + [1, 1, 1], + 'SAME', + y1, + out1, + reuse=False) + + # Implements residual connection + if self._use_residual and x1.shape[-1].value == nc1: + res = x1 + else: + res = 0.0 + + x1 = tf.nn.tanh(out1) + res + + # If we don't need the last layer of rank-2 features, set nc2 to None + if i < len(self._layers) - 1 or self._use_last_layer: + self._w2.append( + snt.Conv2D( + output_channels=nc2, kernel_shape=1, initializers=initializers)) + out2 = self._w2[-1](x2) + + if layer_collection: + layer_collection.register_conv2d((self._w2[-1].w, self._w2[-1].b), + [1, 1, 1, 1], + 'SAME', + x2, + out2, + reuse=False) + + x2 = tf.nn.tanh(out2) + else: + x2 = None + + return x1, x2 + + +class EquivariantNet(snt.AbstractModule): + """A symmetric network like ParallelNet, but without the rank 2 stream.""" + + def __init__(self, shape, layers, residual=False, name='parallel_net'): + super(EquivariantNet, self).__init__(name=name) + self._shape = shape # tuple of input shapes + self._n = sum(shape) + self._layers = layers + self._use_residual = residual + + def _build(self, x1, layer_collection=None): + self._w1 = [] + initializers = { + 'w': tf.initializers.orthogonal(), + 'b': tf.initializers.random_normal() + } + for nc1 in self._layers: + xs1 = tf.split(x1, self._shape, axis=1) + + # mean field features + mf1 = [ + tf.tile( + tf.reduce_mean(x, axis=1, keepdims=True), [1, x1.shape[1], 1]) + for x in xs1 + ] + y1 = tf.concat([x1] + mf1, axis=2) + + self._w1.append( + snt.Conv1D( + output_channels=nc1, kernel_shape=1, initializers=initializers)) + out1 = self._w1[-1](y1) + + if layer_collection: + layer_collection.register_conv1d((self._w1[-1].w, self._w1[-1].b), + [1, 1, 1], + 'SAME', + y1, + out1, + reuse=False) + + # Implements residual connection + if self._use_residual and x1.shape[-1].value == nc1: + res = x1 + else: + res = 0.0 + + x1 = tf.nn.tanh(out1) + res + + return x1 + + +class LogMatmul(snt.AbstractModule): + """Takes log-domain vector and performs numerically stable matmul.""" + + def __init__(self, outputs, use_bias=False, name='log_matmul'): + super(LogMatmul, self).__init__(name=name) + self._outputs = outputs + self._use_bias = use_bias + + def _build(self, log_input, sign_input, layer_collection=None): + rank = log_input.shape.ndims + if rank == 2: + self._layer = snt.Linear(self._outputs, use_bias=self._use_bias) + elif rank == 3: + self._layer = snt.Conv1D( + output_channels=self._outputs, + kernel_shape=1, + use_bias=self._use_bias) + else: + raise RuntimeError('LogMatmul only supports rank 2 and 3 tensors.') + + log_max = tf.reduce_max(log_input, axis=-1, keepdims=True) + inputs = tf.exp(log_input - log_max) * sign_input + output = self._layer(inputs) + sign_output = tf.sign(output) + log_output = tf.log(tf.abs(output)) + log_max + if layer_collection: + if self._use_bias: + params = (self._layer.w, self._layer.b) + else: + params = self._layer.w + if rank == 2: + layer_collection.register_fully_connected( + params, inputs, output, reuse=False) + elif rank == 3: + layer_collection.register_conv1d( + params, [1, 1, 1], 'SAME', inputs, output, reuse=False) + return log_output, sign_output + + +class Envelope(snt.AbstractModule): + """Compute exponentially-decaying envelope to enforce boundary conditions.""" + + def __init__(self, atoms, shape, determinants, soft=False, name='envelope'): + r"""Compute exponentially-decaying envelope to enforce boundary conditions. + + Given a set of electron positions r_i and atom positions z_j, this module + learns a set of Mahalonobis distance metrics A_jk and mixing proportions + \pi_jk and computes a set of exponentially-decaying envelopes of the form: + + f_k(r_i) = + \sum_j \pi_jk exp(-sqrt((r_i - z_j)^T * A_jk^T * A_jk * (r_i - z_j))) + + where the index `k` is over all orbitals, both spin up and spin down. + Each of these envelopes is multiplied by one of the orbitals in a Fermi Net + to enforce the boundary condition that the wavefunction goes to zero at + infinity. + + Args: + atoms: list of atom positions + shape: tuple with number of spin up and spin down electrons + determinants: number of determinants, used to compute number of envelopes + soft: If true, use sqrt(1 + sum(x**2)) instead of norm(x) + name: Sonnet boilerplate + + Returns: + list of envelope ops, one spin up and one for spin down orbitals + """ + super(Envelope, self).__init__(name=name) + self._atoms = atoms + self._shape = shape + self._na = len(atoms) + self._nd = determinants + self._soft = soft # remove cusp from envelope + + def _build(self, inputs, layer_collection=None): + """Create ops to compute envelope for every electron/orbital pair. + + Args: + inputs: tensor of shape (batch size, nelectrons, 3) with position of every + electron + layer_collection (optional): if using KFAC, the layer collection to + register ops with. + + Returns: + List of the value of the envelope for spin up and spin down electrons. + Each element of the list is a tensorflow op with size + (batch, shape[i], norbitals) where shape[i] is the number of electrons of + spin "i", and norbitals is shape[i] * the number of determinants. + """ + # shape of each (atom/orbital) envelope + self._envelope_sigma = [] + # mixing proportion for all atoms for a given orbital + self._envelope_pi = [] + self._envelopes = [] + norms = [[] for _ in self._shape] + + pos = tf.stack([ + tf.constant(atom.coords, dtype=inputs.dtype.base_dtype) + for atom in self._atoms + ]) + pos = tf.expand_dims(tf.expand_dims(pos, 0), 0) + diff_ae = tf.expand_dims(inputs, 2) - pos + + # Split difference from electron to atom into list, one per atom, then + # further split each element into a list, one for spin up electrons, one + # for spin down. + diff_aes = [ + tf.split(x, self._shape, axis=1) + for x in tf.split(diff_ae, self._na, axis=2) + ] + + # Compute the weighted distances from electron to atom + # We swap the normal convention of `i` for the outer for loop and `j` for + # the inner for loop to conform to the notation in the docstring. + for j in range(self._na): # iterate over atoms + self._envelope_sigma.append([]) + for i in range(len(self._shape)): # iterate over spin up/down electrons + norbital = self._shape[i] * self._nd + eyes = tf.transpose( + tf.reshape( + tf.eye( + 3, batch_shape=[norbital], dtype=inputs.dtype.base_dtype), + [1, 1, norbital * 3, 3]), (0, 1, 3, 2)) + self._envelope_sigma[j].append( + tf.get_variable( + 'envelope_sigma_%d_%d' % (j, i), + initializer=eyes, + use_resource=True, + trainable=True, + dtype=inputs.dtype.base_dtype, + )) + + # multiply the difference from the electron to the atom by an + # anisotropic weight - one 3x3 matrix per orbital function + + # From the notation in the docstring, `rout` is + # equivalent to (A_j1; ...; A_jk) * (r_i - z_j) + + # Note, snt.Conv2D won't take bare TF ops as initializers, so we just + # use tf.nn.conv2d here instaed. + rout = tf.nn.conv2d(diff_aes[j][i], self._envelope_sigma[j][i], + [1, 1, 1, 1], 'SAME') + if layer_collection: + layer_collection.register_conv2d( + self._envelope_sigma[j][i], [1, 1, 1, 1], + 'SAME', + diff_aes[j][i], + rout, + reuse=False) + + # Reshape `rout` so that rout[:, i, k, :] represents + # A_jk * (r_i - z_j) for all r_i in the batch + rout = tf.reshape(rout, [rout.shape[0], rout.shape[1], norbital, 3]) + if self._soft: + norms[i].append( + tf.sqrt(tf.reduce_sum(1 + rout**2, axis=3, keepdims=True))) + else: + norms[i].append(tf.norm(rout, axis=3, keepdims=True)) + + # Compute the mixing proportions for different atoms + for i in range(len(self._shape)): + norbital = self._shape[i] * self._nd + self._envelope_pi.append( + tf.get_variable( + 'envelope_pi_%d' % i, + initializer=tf.ones([norbital, self._na], + dtype=inputs.dtype.base_dtype), + use_resource=True, + trainable=True, + dtype=inputs.dtype.base_dtype, + )) + if layer_collection: + layer_collection.register_generic( + self._envelope_pi[i], inputs.shape[0].value, reuse=False) + + # Take the exponential of the Mahalonobis distance and multiply each + # term by the weight for that atom. + norm = tf.concat(norms[i], axis=3) + mixed_norms = tf.exp(-norm) * self._envelope_pi[i] + + # Add the weighted terms for each atom together to form the envelope. + # Each term in this list has shape (batch size, shape[i], norbitals) + # where shape[i] is the number of electrons of spin `i`. This computes + # the value of the envelope for every combination of electrons and + # orbitals. + self._envelopes.append(tf.reduce_sum(mixed_norms, axis=3)) + + return self._envelopes + + +class BackFlow(snt.AbstractModule): + """Introduce correlation in Slater network via coordinate transformations. + + Electron-electron, electron-nuclear and electron-electron-nuclear + contributions are computed using MLPs for the nu, mu, theta and phi + functions. + """ + + def _build(self, xs, xsa, r_en, r_ee, layer_collection=None): + # Notation is explained in the CASINO manual, section 23.1 + # (https://casinoqmc.net/casino_manual_dir/casino_manual.pdf) + backflow_ee = TensorMlp([64, 64, 64, 1], + rank=2, + activate_final=False, + residual=True, + gain=1.e-3, + stddev=1.e-3) + + backflow_en = TensorMlp([64, 64, 64, 1], + rank=2, + activate_final=False, + residual=True, + gain=1.e-3, + stddev=1.e-3) + + backflow_een = TensorMlp([64, 64, 64, 2], + rank=2, + activate_final=False, + residual=True, + gain=1.e-3, + stddev=1.e-3) + + # electron-electron backflow term + eta = backflow_ee(r_ee, layer_collection=layer_collection) + + # electron-nuclear backflow term + r_en = tf.expand_dims(r_en, -1) + mu = backflow_en(r_en, layer_collection=layer_collection) + + # The most complicated term: the electron-electron-nuclear backflow term + shape = [r_ee.shape[0], r_ee.shape[1], r_ee.shape[2], r_en.shape[2], 1] + r_een = tf.concat((tf.broadcast_to(tf.expand_dims( + r_ee, -2), shape), tf.broadcast_to(tf.expand_dims(r_en, -3), shape), + tf.broadcast_to(tf.expand_dims(r_en, -4), shape)), + axis=-1) + # Reshape so we can squeeze this into a rank-2 network. + r_een_flat = tf.reshape(r_een, [ + r_een.shape[0], r_een.shape[1], r_een.shape[2] * r_een.shape[3], + r_een.shape[4] + ]) + phi_theta = backflow_een(r_een_flat, layer_collection=layer_collection) + phi = tf.reshape(phi_theta[..., 0], r_een.shape[:-1] + [1]) + theta = tf.reshape(phi_theta[..., 1], r_een.shape[:-1] + [1]) + + diffs = tf.expand_dims(xs, 1) - tf.expand_dims(xs, 2) + backflow = ( + tf.reduce_sum(eta * diffs, axis=2) + tf.reduce_sum(mu * xsa, axis=2) + + tf.reduce_sum(phi * tf.expand_dims(diffs, axis=3), axis=[2, 3]) + + tf.reduce_sum(theta * tf.expand_dims(xsa, axis=2), axis=[2, 3])) + return backflow + + +class FermiNet(snt.AbstractModule): + """Neural network with a determinant layer to antisymmetrize output.""" + + def __init__(self, + *, + atoms, + nelectrons, + slater_dets, + hidden_units, + after_det, + dim=3, + architecture='ferminet', + envelope=False, + residual=False, + r12_ee_features=True, + r12_en_features=True, + pos_ee_features=True, + build_backflow=False, + use_backflow=False, + jastrow_en=False, + jastrow_ee=False, + jastrow_een=False, + logdet=False, + pretrain_iterations=2, + name='det_net'): + super(FermiNet, self).__init__(name=name) + self._atoms = atoms + self._dim = dim + # tuple of number of (spin up, spin down) e's or a tuple of (e's) for + # spin-polarised systems. + self._shape = [nelec for nelec in nelectrons if nelec > 0] + self._ne = sum(nelectrons) # total number of electrons + self._na = len(atoms) # number of atoms + self._nd = slater_dets # number of determinants + + self._architecture = architecture + self._hidden_units = hidden_units + self._after_det = after_det + self._add_envelope = envelope + self._r12_ee = r12_ee_features + self._r12_en = r12_en_features + self._pos_ee = pos_ee_features + self._use_residual = residual + self._build_backflow = build_backflow + self._use_backflow = use_backflow + self._use_jastrow_en = jastrow_en + self._use_jastrow_ee = jastrow_ee + self._use_jastrow_een = jastrow_een + self._output_log = logdet # Whether to represent wavef'n or log-wavef'n + self.pretrain_iterations = pretrain_iterations + + def _features(self, xs, r_ee, r_en, xsa): + x1 = [] + x2 = [] + if self._r12_en: + x1.append(r_en) + if self._r12_ee: + x2.append(r_ee) + xsa = tf.reshape(xsa, [xsa.shape[0], xsa.shape[1], -1]) + x1.append(xsa) + if self._pos_ee: + diffs = tf.expand_dims(xs, 1) - tf.expand_dims(xs, 2) + x2.append(diffs) + x1 = tf.concat(x1, axis=2) if x1 else None + x2 = tf.concat(x2, axis=3) if x2 else None + return x1, x2 + + def _build(self, inputs, layer_collection=None, back_prop=True): + self.inputs = inputs + xs = tf.reshape(inputs, [inputs.shape[0], self._ne, -1]) + r_en, r_ee, xsa = hamiltonian.r12_features( + inputs, + self._atoms, + self._ne, + keep_pos=True, + flatten=False, + atomic_coords=True) + + if self._use_backflow or self._build_backflow: + if self._architecture != 'slater': + raise ValueError('Backflow should only be used with Hartree-Fock/' + 'Slater-Jastrow network') + backflow_net = BackFlow() + backflow = backflow_net(xs, xsa, r_en, r_ee, layer_collection) + else: + backflow = None + + if backflow is not None: + # Warning: Backflow transformed coordinates should *only* be fed into the + # orbitals and not into the Jastrow factor. Denote functions of the + # backflow as q* accordingly. + q_inputs = inputs + tf.reshape(backflow, inputs.shape) + qs = tf.reshape(q_inputs, [q_inputs.shape[0], self._ne, -1]) + q_en, q_ee, qsa = hamiltonian.r12_features( + q_inputs, + self._atoms, + self._ne, + keep_pos=True, + flatten=False, + atomic_coords=True) + x1, x2 = self._features(qs, q_ee, q_en, qsa) + else: + x1, x2 = self._features(xs, r_ee, r_en, xsa) + + # Fermi Net or Slater-Jastrow net? + if self._architecture == 'ferminet': + if isinstance(self._hidden_units[0], typing.Iterable): + self._parallel_net = ParallelNet( + self._shape, + self._hidden_units, + residual=self._use_residual, + use_last_layer=False) + self._before_det, self._before_jastrow = self._parallel_net( + x1, x2, r_ee, layer_collection=layer_collection) + else: + self._parallel_net = EquivariantNet( + self._shape, self._hidden_units, residual=self._use_residual) + self._before_det = self._parallel_net( + x1, layer_collection=layer_collection) + self._before_jastrow = None + elif self._architecture == 'slater': + self._before_det_mlp = TensorMlp( + self._hidden_units, rank=1, residual=True) + self._before_det = self._before_det_mlp( + x1, layer_collection=layer_collection) + else: + raise ValueError('Not a recognized architecture: {}'.format( + self._architecture)) + + # Split the features into spin up and down electrons, and shape into + # orbitals and determinants. + before_det = tf.split(self._before_det, self._shape, axis=1) + self._orbital_layers = [ + snt.Conv1D(x.shape[1] * self._nd, 1, name='orbital_%d' % i) + for i, x in enumerate(before_det) + ] + orbitals = [] + for i, x in enumerate(before_det): + layer = self._orbital_layers[i] + y = layer(x) + orbitals.append(y) + if layer_collection: + layer_collection.register_conv1d((layer.w, layer.b), [1, 1, 1], + 'SAME', + x, + y, + reuse=False) + + # Create an exponentially-decaying envelope around every atom + if self._add_envelope: + envelope = Envelope(self._atoms, self._shape, self._nd) + self._envelopes = envelope(xs, layer_collection=layer_collection) + for i in range(len(orbitals)): + orbitals[i] = orbitals[i] * self._envelopes[i] + + self._orbitals = [ + tf.transpose( + tf.reshape(x, [x.shape[0], x.shape[1], -1, x.shape[1]]), + [0, 2, 1, 3]) for x in orbitals + ] + + slog1d = lambda x: (tf.sign(x[..., 0, 0]), tf.log(tf.abs(x[..., 0, 0]))) + slog = lambda x: log_determinant(x) if x.shape[-1].value > 1 else slog1d(x) + if self._output_log: + if self._nd == 1: + self._slogdets = [ + slog(x) for x in self._orbitals + ] + self._logdet = tf.add_n([x[1] for x in self._slogdets]) + self._signs = functools.reduce( + tf.Tensor.__mul__, # pytype: disable=unsupported-operands + [x[0] for x in self._slogdets]) + output = (self._logdet, self._signs) + else: + self._after_det_weights = tf.get_variable( + 'after_det_weights', + shape=[self._nd, self._after_det[0]], + initializer=tf.initializers.ones(dtype=inputs.dtype.base_dtype), + use_resource=True, + dtype=inputs.dtype.base_dtype, + ) + if back_prop: + if len(self._orbitals) == 1: + output = list(slog(x) for x in self._orbitals) + signdet, logdet = output[0] + log_max = tf.reduce_max(logdet, axis=-1, keepdims=True) + inputs = tf.exp(logdet - log_max) * signdet + output = tf.matmul(inputs, self._after_det_weights) + output = tf.log(tf.abs(output)) + log_max, tf.sign(output) + else: + output = logdet_matmul(self._orbitals[0], self._orbitals[1], + self._after_det_weights) + else: + # Compute logdet with tf.linalg.slogdet, as gradients are not needed + # We keep this outside logdet_matmul, despite the code duplication, + # because tf.custom_gradient decorator does not like keyword args + if len(self._orbitals) == 1: + signdet, logdet = tf.linalg.slogdet(self._orbitals[0]) + else: + slogdet0 = tf.linalg.slogdet(self._orbitals[0]) + slogdet1 = tf.linalg.slogdet(self._orbitals[1]) + signdet = slogdet0[0] * slogdet1[0] + logdet = slogdet0[1] + slogdet1[1] + + log_max = tf.reduce_max(logdet, axis=-1, keepdims=True) + inputs = tf.exp(logdet - log_max) * signdet + output = tf.matmul(inputs, self._after_det_weights) + output = tf.log(tf.abs(output)) + log_max, tf.sign(output) + + self._logdet = output[0] + if layer_collection: + layer_collection.register_generic( + self._after_det_weights, + self._orbitals[0].shape[0].value, + reuse=False) + + if len(self._after_det) > 1: + log_out, sign_out = output + # Constructing x and -x to feed through network to make odd function + output = (tf.stack((log_out, log_out), + axis=1), tf.stack((sign_out, -1 * sign_out), + axis=1)) + for nout in self._after_det[1:]: + layer = LogMatmul(nout, use_bias=True) + # apply ReLU in log domain + output = (tf.where( + tf.greater(output[1], 0.0), output[0], + -100 * tf.ones_like(output[0])), output[1]) + output = layer(*output, layer_collection=layer_collection) + + # Make output an odd function + log_out, sign_out = output + logs_out = [ + tf.squeeze(out, axis=1) for out in tf.split(log_out, 2, axis=1) + ] + signs_out = [ + tf.squeeze(out, axis=1) for out in tf.split(sign_out, 2, axis=1) + ] + log_max = tf.maximum( + tf.reduce_max(logs_out[0], axis=1, keepdims=True), + tf.reduce_max(logs_out[1], axis=1, keepdims=True)) + real_out = ( + tf.exp(logs_out[0] - log_max) * signs_out[0] - + tf.exp(logs_out[1] - log_max) * signs_out[1]) + output = (tf.log(tf.abs(real_out)) + log_max, tf.sign(real_out)) + else: + self._dets = [ + determinant(x) if x.shape[-1].value > 1 else x[..., 0, 0] + for x in self._orbitals + ] + self._det = functools.reduce(tf.Tensor.__mul__, self._dets) # pytype: disable=unsupported-operands + after_det_input = self._det + + self._after_det_mlp = TensorMlp(self._after_det, use_bias=True) + + combined_input = tf.concat([after_det_input, -after_det_input], axis=0) + combined_output = self._after_det_mlp( + combined_input, layer_collection=layer_collection) + output_0, output_1 = tf.split(combined_output, 2, axis=0) + output = output_0 - output_1 + + total_jastrow = 0.0 + if self._use_jastrow_ee: + self._jastrow_ee_net = TensorMlp( + [64, 64, 64, len(self._shape)**2], + rank=2, + activate_final=False, + residual=True) + # To prevent NaNs, rescale factor by number of electrons squared. + self._jastrow_ee = self._jastrow_ee_net( + r_ee, layer_collection=layer_collection) / ( + self._ne**2) + + # split output into channels for different spin interactions + jastrow_ee = flatten([ + tf.split(x, self._shape, axis=2) + for x in tf.split(self._jastrow_ee, self._shape, axis=1) + ]) + # For each spin interaction (alpha/alpha, alpha/beta, beta/alpha, + # beta/beta), get just the Jastrow term for that interaction + jastrow_ee = [x[..., i:i + 1] for i, x in enumerate(jastrow_ee)] + # Add up all Jastrow terms for all pairs of electrons over all spins + jastrow_ee = tf.add_n([tf.reduce_sum(x, axis=[1, 2]) for x in jastrow_ee]) + total_jastrow += jastrow_ee + + if self._use_jastrow_en: + self._jastrow_en_net = TensorMlp([64, 64, 64, self._na], + rank=2, + activate_final=False, + residual=True) + self._jastrow_en = ( + self._jastrow_en_net( + r_en[..., None], layer_collection=layer_collection) / self._ne / + self._na) + # We have one output per atom/electron distance. Rather than having one + # network for every atom, we have one network with many outputs, and take + # the diagonal of the outputs. + jastrow_en = tf.reduce_sum(tf.matrix_diag_part(self._jastrow_en), axis=2) + jastrow_en = tf.reduce_sum(jastrow_en, axis=1, keepdims=True) + total_jastrow += jastrow_en + + if self._use_jastrow_een: + self._jastrow_een_nets = [] + self._jastrow_een = [] + for i in range(self._na): + # Create a separate network for every atom. Not efficient, but works + # well enough for the experiments we need this for. This is only for + # comparison against benchmarks in the literature and is not meant to + # be a default component of the Fermionic Neural Network. + self._jastrow_een_nets.append( + TensorMlp([64, 64, 64, len(self._shape)**2], + rank=2, + activate_final=False, + residual=True)) + + batch = tf.concat((r_ee, + tf.tile( + tf.expand_dims(r_en[..., i:i + 1], axis=1), + [1, self._ne, 1, 1]), + tf.tile( + tf.expand_dims(r_en[..., i:i + 1], axis=2), + [1, 1, self._ne, 1])), + axis=-1) + self._jastrow_een.append(self._jastrow_een_nets[i]( + batch, layer_collection=layer_collection) / (self._ne**2)) + + # split output into channels for different spin interactions + jastrow_een = flatten([ + tf.split(x, self._shape, axis=2) + for x in tf.split(self._jastrow_een[i], self._shape, axis=1) + ]) + # For each spin interaction (alpha/alpha, alpha/beta, beta/alpha, + # beta/beta), get just the Jastrow term for that interaction + jastrow_een = [x[..., i:i + 1] for i, x in enumerate(jastrow_een)] + # Add up all Jastrow terms for all pairs of electrons over all spins + jastrow_een = tf.add_n( + [tf.reduce_sum(x, axis=[1, 2]) for x in jastrow_een]) + total_jastrow += jastrow_een + + if self._output_log: + output = (output[0] - total_jastrow, output[1]) + else: + output = output * tf.exp(-total_jastrow) + + self.output = output + return self.output + + +def pretrain_hartree_fock(network, data, strategy, hf_approx): + """Pretrain network so orbitals are nearly uncorrelated. + + Args: + network: network to be pretrained. Note: only the network._orbitals tensor + is pretrained. + data: (nreplicas, batch, spatial_dim * nelectrons) Tensor. Input to network. + strategy: DistributionStrategy instance. + hf_approx: Instance of Scf class containing Hartree-Fock approximation. + + Returns: + hf_loss: Distance between orbitals and Hartree-Fock orbitals. Minimised each + iteration by tf.AdamOptimizer. + """ + + with strategy.scope(): + optim = tf.train.AdamOptimizer(name='Adam_pretrain') + + def pretrain_step_fn(walkers): + """Step function for pretraining.""" + replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group + data = walkers[replica_id] + network(data) + xs = tf.reshape(data, [int(data.shape[0]), sum(hf_approx.nelectrons), -1]) + hf_mats = hf_approx.tf_eval_hf(xs, deriv=True) + + # pylint: disable=protected-access + diffs = [ + tf.expand_dims(hf, 1) - nn + for (hf, nn) in zip(hf_mats, network._orbitals) + ] + # pylint: enable=protected-access + mses = [tf.reduce_mean(tf.square(diff)) for diff in diffs] + if len(mses) > 1: + mse_loss = 0.5 * tf.add(*mses) + else: + mse_loss = 0.5 * mses[0] + + pretrain_op = optim.minimize(mse_loss) + with tf.control_dependencies([pretrain_op]): + mse_loss = tf.identity(mse_loss) + return mse_loss + + mse_loss = strategy.experimental_run( + functools.partial(pretrain_step_fn, walkers=data)) + return strategy.reduce(tf.distribute.ReduceOp.MEAN, mse_loss) diff --git a/ferminet/qmc.py b/ferminet/qmc.py new file mode 100644 index 0000000..55deac5 --- /dev/null +++ b/ferminet/qmc.py @@ -0,0 +1,493 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Top-level QMC training.""" + +import contextlib +import functools +import os +import shutil +from typing import Iterable, Mapping, Optional, Sequence, Text + +from absl import flags +from absl import logging +from ferminet import networks +import kfac +import numpy as np +import tables +import tensorflow.compat.v1 as tf +import tensorflow_probability as tfp +from tensorflow.core.protobuf import rewriter_config_pb2 + +FLAGS = flags.FLAGS + + +class Writer(contextlib.AbstractContextManager): + """Write data to CSV, as well as logging data to stdout if desired.""" + + def __init__(self, + name: str, + schema: Sequence[str], + directory: str = 'logs/', + iteration_key: Optional[str] = 't', + log: bool = True): + """Initialise Writer. + + Args: + name: file name for CSV. + schema: sequence of keys, corresponding to each data item. + directory: directory path to write file to. + iteration_key: if not None or a null string, also include the iteration + index as the first column in the CSV output with the given key. + log: Also log each entry to stdout. + """ + self._schema = schema + if not os.path.isdir(directory): + os.mkdir(directory) + self._filename = os.path.join(directory, name + '.csv') + self._iteration_key = iteration_key + self._log = log + + def __enter__(self): + self._file = open(self._filename, 'w') + # write top row of csv + if self._iteration_key: + self._file.write(f'{self._iteration_key},') + self._file.write(','.join(self._schema) + '\n') + return self + + def write(self, t: int, **data): + """Writes to file and stdout. + + Args: + t: iteration index. + **data: data items with keys as given in schema. + """ + row = [str(data.get(key, '')) for key in self._schema] + if self._iteration_key: + row.insert(0, str(t)) + for key in data: + if key not in self._schema: + raise ValueError('Not a recognized key for writer: %s' % key) + + # write the data to csv + self._file.write(','.join(row) + '\n') + + # write the data to abseil logs + if self._log: + logging.info('Iteration %s: %s', t, data) + + def __exit__(self, exc_type, exc_val, exc_tb): + self._file.close() + + +class H5Writer(contextlib.AbstractContextManager): + """Write data to HDF5 files.""" + + def __init__(self, + name: str, + schema: Mapping[str, Sequence[int]], + directory: str = '', + index_key: str = 't', + compression_level: int = 5): + """Initialise H5Writer. + + Args: + name: file name for CSV. + schema: dict of keys, corresponding to each data item . All data is + assumed ot be 32-bit floats. + directory: directory path to write file to. + index_key: name of (integer) key used to index each entry. + compression_level: compression level (0-9) used to compress HDF5 file. + """ + self._path = os.path.join(directory, name) + self._schema = schema + self._index_key = index_key + self._description = {} + self._file = None + self._complevel = compression_level + + def __enter__(self): + if not self._schema: + return self + pos = 1 + self._description[self._index_key] = tables.Int32Col(pos=pos) + for key, shape in self._schema.items(): + pos += 1 + self._description[key] = tables.Float32Col(pos=pos, shape=shape) + if not os.path.isdir(os.path.dirname(self._path)): + os.mkdir(os.path.dirname(self._path)) + self._file = tables.open_file( + self._path, + mode='w', + title='Fermi Net Data', + filters=tables.Filters(complevel=self._complevel)) + self._table = self._file.create_table( + where=self._file.root, name='data', description=self._description) + return self + + def write(self, index: int, data): + """Write data to HDF5 file. + + Args: + index: iteration index. + data: dict of arrays to write to file. Only elements with keys in the + schema are written. + """ + if self._file: + h5_data = (index,) + for key in self._description: + if key != self._index_key: + h5_data += (data[key],) + self._table.append([h5_data]) + self._table.flush() + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._file: + self._file.close() + self._file = None + + +class QMC: + """Quantum Monte Carlo on a Fermi Net.""" + + def __init__( + self, + hamiltonian, + network, + data_gen, + hf_data_gen, + clip_el: Optional[float] = None, + check_loss: bool = False, + ): + """Constructs a QMC object for training a Fermi Net. + + Args: + hamiltonian: Hamiltonian operators. See hamiltonian.py + network: network to represent the wavefunction, psi. + data_gen: mcmc.MCMC object that samples configurations from + psi^2, where psi is the output of the network. + hf_data_gen: mcmc.MCMC object that samples configurations from + phi^2, where phi is a Hartree-Fock state of the Hamiltonian. + clip_el: If not None, sets the scale at which to clip the local energy. + Note the local energy is only clipped in the grad_loss passed back to + the gradient, not in the local_energy op itself. The width of the window + in which the local energy is not clipped is a multiple of the total + variation from the median, which is the equivalent of standard deviation + for the l1 norm instead of l2 norm. This makes the clipping robuts to + outliers, which is the entire point. + check_loss: If True, check the loss for NaNs and raise an error if found. + """ + self.hamiltonian = hamiltonian + self.data_gen = data_gen + self.hf_data_gen = hf_data_gen + self._clip_el = clip_el + self._check_loss = check_loss + self.network = network + + def _qmc_step_fn(self, optimizer_fn, using_kfac, global_step): + """Training step for network given the MCMC state. + + Args: + optimizer_fn: A function which takes as argument a LayerCollection object + (None) if using_kfac is True (False) and returns the optimizer. + using_kfac: True if optimizer_fn creates a instance of kfac.KfacOptimizer + and False otherwise. + global_step: tensorflow op for global step index. + + Returns: + loss: per-GPU loss tensor with control dependencies for updating network. + local_energy: local energy for each walker + features: network output for each walker. + + Raises: + RuntimeError: If using_kfac is True and optimizer_fn does not create a + kfac.KfacOptimizer instance or the converse. + """ + + # Note layer_collection cannot be modified after the KFac optimizer has been + # constructed. + if using_kfac: + layer_collection = kfac.LayerCollection() + else: + layer_collection = None + + walkers = self.data_gen.walkers_per_gpu + features, features_sign = self.network(walkers, layer_collection) + optimizer = optimizer_fn(layer_collection) + + if bool(using_kfac) != isinstance(optimizer, kfac.KfacOptimizer): + raise RuntimeError('Not using KFac but using_kfac is True.') + + if layer_collection: + layer_collection.register_squared_error_loss(features, reuse=False) + + with tf.name_scope('local_energy'): + kinetic_fn, potential_fn = self.hamiltonian + kinetic = kinetic_fn(features, walkers) + potential = potential_fn(walkers) + local_energy = kinetic + potential + loss = tf.reduce_mean(local_energy) + replica_context = tf.distribute.get_replica_context() + mean_op = tf.distribute.ReduceOp.MEAN + mean_loss = replica_context.merge_call( + lambda strategy, val: strategy.reduce(mean_op, val), args=(loss,)) + grad_loss = local_energy - mean_loss + + if self._clip_el is not None: + # clip_el should be much larger than 1, to avoid bias + median = tfp.stats.percentile(grad_loss, 50.0) + diff = tf.reduce_mean(tf.abs(grad_loss - median)) + grad_loss_clipped = tf.clip_by_value(grad_loss, + median - self._clip_el * diff, + median + self._clip_el * diff) + else: + grad_loss_clipped = grad_loss + + with tf.name_scope('step'): + # Create functions which take no arguments and return the ops for applying + # an optimisation step. + if not optimizer: + optimize_step = tf.no_op + else: + optimize_step = functools.partial( + optimizer.minimize, + features, + global_step=global_step, + var_list=self.network.trainable_variables, + grad_loss=grad_loss_clipped) + + if self._check_loss: + # Apply optimisation step only if all local energies are well-defined. + step = tf.cond( + tf.reduce_any(tf.math.is_nan(mean_loss)), tf.no_op, optimize_step) + else: + # Faster, but less safe: always apply optimisation step. If the + # gradients are not well-defined (e.g. loss contains a NaN), then the + # network will also be set to NaN. + step = optimize_step() + + # A strategy step function must return tensors, not ops so apply a + # control dependency to a dummy op to ensure they're executed. + with tf.control_dependencies([step]): + loss = tf.identity(loss) + + return { + 'loss': loss, + 'local_energies': local_energy, + 'features': features, + 'features_sign': features_sign + } + + def train(self, + optim_fn, + iterations: int, + logging_config, + scf_approx, + strategy, + using_kfac: bool, + prefix: Text = 'fermi_net', + global_step=None, + profile_iterations: Optional[Iterable[int]] = None, + determinism_mode: bool = False, + cached_data_op=None, + write_graph: Optional[Text] = None, + burn_in: int = 0, + mcmc_steps: int = 10): + """Training loop for VMC with hooks for logging and plotting. + + Args: + optim_fn: function with zero arguments (thunk) that returns a class + implementing optimizer (specifically apply_gradients method). + iterations: number of iterations to perform for training. + logging_config: LoggingConfig object defining what and how to log data. + scf_approx: For hf pretraining, object of class Scf, otherwise None. + strategy: DistributionStrategy instance to use for parallelisation. Must + match the strategy used to build the model. + using_kfac: True if optim_fn creates a kfac.KfacOptimizer object, False + otherwise. + prefix: prefix to use for dataframes in bigtable. Appended with _sparse + and _detailed for configuration and training dataframes respectively. + global_step: tensorflow op for global step number. If None set to 0 and + incremented each iteration. Can be used to provide a learning rate + schedule. + profile_iterations: iterable of iteration numbers at which the training + session is to be profiled. Note: data generation step (e.g. MCMC) is not + included in the profile and must be added separately if required. + determinism_mode: activates deterministic computation when in CPU mode. + cached_data_op: Op which updates a variable to cache the current training + batch. Only necessary if using KFAC with adaptive damping. + write_graph: if given, write graph to given file. Must be a full path. + burn_in: Number of burn in steps after pretraining. If zero do not burn in + or reinitialize walkers. + mcmc_steps: Number of MCMC steps to perform between network updates. + + Raises: + RuntimeError: if a NaN value for the loss or another accumulated statistic + is detected. + """ + if global_step is None: + with strategy.scope(): + global_step = tf.train.get_or_create_global_step() + if profile_iterations is None: + profile_iterations = [] + + if hasattr(self.network, 'pretrain_iterations'): + stats_len = max(iterations, self.network.pretrain_iterations) + else: + stats_len = iterations + + with strategy.scope(): + mcmc_step = self.data_gen.step() + mcmc_hf_step = self.hf_data_gen.step() + mcmc_reset_walkers = self.data_gen.reinitialize_walkers() + mcmc_update_psi = self.data_gen.update_psi() + + # Pretraining + concat_data = tf.concat([self.hf_data_gen.walkers, self.data_gen.walkers], + axis=0) + pretrain_ops = networks.pretrain_hartree_fock(self.network, concat_data, + strategy, scf_approx) + + qmc_step_fn = functools.partial(self._qmc_step_fn, optim_fn, using_kfac, + global_step) + with strategy.scope(): + qmc_out_per_replica = strategy.experimental_run(qmc_step_fn) + qmc_out_per_replica = { + key: tf.stack(strategy.experimental_local_results(value)) + for key, value in qmc_out_per_replica.items() + } + loss = tf.reduce_mean(qmc_out_per_replica['loss']) + + stats_ops = self.data_gen.stats_ops(log_walkers=logging_config.walkers) + stats_ops['loss_per_replica'] = qmc_out_per_replica['loss'] + stats_loss_ = np.zeros(shape=qmc_out_per_replica['loss'].shape.as_list()) + + if logging_config.wavefunction: + stats_ops['log_wavefunction'] = qmc_out_per_replica['features'] + stats_ops['wavefunction_sign'] = qmc_out_per_replica['feature_sign'] + if logging_config.local_energy: + stats_ops['local_energy'] = qmc_out_per_replica['local_energy'] + + train_schema = ['energy', 'pmove'] + if logging_config.replicas > 1: + train_schema += [ + 'energy_replica:{}'.format(i) for i in range(logging_config.replicas) + ] + + # As these data are much larger, we create a separated H5 file to avoid + # polluting the CSV. + h5_data = ('walkers', 'log_wavefunction', 'wavefunction_sign', + 'local_energy') + h5_schema = { + key: stats_ops[key].shape.as_list() + for key in h5_data + if key in stats_ops + } + + if write_graph: + tf.train.write_graph(tf.get_default_graph(), os.path.dirname(write_graph), + os.path.basename(write_graph)) + + if determinism_mode: + config_proto = tf.ConfigProto( + inter_op_parallelism_threads=1, + device_count={'GPU': 0}, + allow_soft_placement=True) + else: + config_proto = tf.ConfigProto(allow_soft_placement=True) + # See https://github.com/tensorflow/tensorflow/issues/23780. + config_proto.graph_options.rewrite_options.memory_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) + + saver = tf.train.Saver(max_to_keep=10, save_relative_paths=True) + scaffold = tf.train.Scaffold(saver=saver) + checkpoint_path = os.path.join(logging_config.result_path, 'checkpoints') + if logging_config.restore_path: + logging.info('Copying %s to %s and attempting to restore', + logging_config.restore_path, checkpoint_path) + shutil.copytree(logging_config.restore_path, checkpoint_path) + + with tf.train.MonitoredTrainingSession( + scaffold=scaffold, + checkpoint_dir=checkpoint_path, + save_checkpoint_secs=logging_config.save_frequency * 60, + save_checkpoint_steps=None, + save_summaries_steps=None, + log_step_count_steps=None, + config=config_proto, + ) as sess: + logging.info('Initialized variables') + + if self.network.pretrain_iterations > 0: + logging.info('Pretraining network parameters') + with Writer( + name='pretrain_stats', + schema=['loss'], + directory=logging_config.result_path) as pretrain_writer: + for t in range(self.network.pretrain_iterations): + for _ in range(mcmc_steps): + sess.run(mcmc_step) + for _ in range(mcmc_steps): + sess.run(mcmc_hf_step) + + pretrain_loss = sess.run(pretrain_ops) + pretrain_writer.write(t, loss=pretrain_loss) + pass + logging.info('Pretrained network parameters.') + + if burn_in > 0: + logging.info('Resetting MCMC state') + sess.run(mcmc_reset_walkers) + for _ in range(burn_in): + for _ in range(mcmc_steps): + sess.run(mcmc_step) + logging.info('MCMC burn in complete') + + with Writer(name='train_stats', schema=train_schema, directory=logging_config.result_path) as training_writer, \ + H5Writer(name='data.h5', schema=h5_schema, directory=logging_config.result_path) as h5_writer: + for t in range(iterations): + if cached_data_op is not None: + sess.run(cached_data_op) + sess.run(mcmc_update_psi) + for _ in range(mcmc_steps): + sess.run(mcmc_step) + loss_, stats_ops_ = sess.run([loss, stats_ops]) + stats_values = [loss_] + list(stats_ops_.values()) + # Update the contents (but don't reassign) stats_loss_, so the + # conditional checkpoint check uses the current per-replica loss via + # an earlier local-scope capture on stats_loss_. + stats_loss_[:] = stats_ops_['loss_per_replica'] + if any(np.isnan(stat).any() for stat in stats_values): + logging.info('NaN value detected. Loss: %s. Stats: %s.', loss_, + stats_ops_) + if self._check_loss: + # Re-run the mcmc_reset (setting psi) as the checkpoint hook is + # evaluated at the end of a session run call. + sess.run(mcmc_update_psi) + raise RuntimeError( + 'NaN value detected. Loss: {}. Stats: {}.'.format( + loss_, stats_ops_)) + + if t % logging_config.stats_frequency == 0: + writer_kwargs = { + 'energy': loss_, + 'pmove': stats_ops_['pmove'], + } + if logging_config.replicas > 1: + for i in range(logging_config.replicas): + lpr = stats_ops_['loss_per_replica'][i] + writer_kwargs['energy_replica:%d' % i] = lpr + training_writer.write(t, **writer_kwargs) + h5_writer.write(t, stats_ops_) diff --git a/ferminet/tests/elements_test.py b/ferminet/tests/elements_test.py new file mode 100644 index 0000000..c9bf705 --- /dev/null +++ b/ferminet/tests/elements_test.py @@ -0,0 +1,121 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.utils.elements.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from ferminet.utils import elements + + +class ElementsTest(parameterized.TestCase): + + def test_elements(self): + for n, element in elements.ATOMIC_NUMS.items(): + self.assertEqual(n, element.atomic_number) + self.assertEqual(elements.SYMBOLS[element.symbol], element) + if element.symbol in ['Li', 'Na', 'K', 'Rb', 'Cs', 'Fr']: + self.assertEqual(element.period, elements.ATOMIC_NUMS[n - 1].period + 1) + elif element.symbol != 'H': + self.assertEqual(element.period, elements.ATOMIC_NUMS[n - 1].period) + self.assertCountEqual( + (element.symbol for element in elements.ATOMIC_NUMS.values()), + elements.SYMBOLS.keys()) + self.assertCountEqual( + (element.atomic_number for element in elements.SYMBOLS.values()), + elements.ATOMIC_NUMS.keys()) + + @parameterized.parameters( + (elements.SYMBOLS['H'], 1, 1, 1), + (elements.SYMBOLS['He'], 1, 18, 0), + (elements.SYMBOLS['Li'], 2, 1, 1), + (elements.SYMBOLS['Be'], 2, 2, 0), + (elements.SYMBOLS['C'], 2, 14, 2), + (elements.SYMBOLS['N'], 2, 15, 3), + (elements.SYMBOLS['Al'], 3, 13, 1), + (elements.SYMBOLS['Zn'], 4, 12, -1), + (elements.SYMBOLS['Ga'], 4, 13, 1), + (elements.SYMBOLS['Kr'], 4, 18, 0), + (elements.SYMBOLS['Ce'], 6, -1, -1), + (elements.SYMBOLS['Ac'], 7, 3, -1), + ) + def test_element_group_period(self, element, period, group, spin_config): + # Validate subset of elements. See below for more thorough tests using + # properties of the periodic table. + self.assertEqual(element.period, period) + self.assertEqual(element.group, group) + if element.group == -1 or 3 <= element.group <= 12: + with self.assertRaises(NotImplementedError): + _ = element.spin_config + else: + self.assertEqual(element.spin_config, spin_config) + + def test_periods(self): + self.assertLen(elements.ATOMIC_NUMS, + sum(len(period) for period in elements.PERIODS.values())) + period_length = {1: 2, 2: 8, 3: 8, 4: 18, 5: 18, 6: 32, 7: 32} + for p, es in elements.PERIODS.items(): + self.assertLen(es, period_length[p]) + + def test_groups(self): + # Atomic numbers of first element in each period. + period_starts = sorted([ + period_elements[0].atomic_number + for period_elements in elements.PERIODS.values() + ]) + # Iterate over all elements in order of atomic number. Group should + # increment monotonically (except for accommodating absence of d block and + # presence of f block) and reset to 1 on the first element in each period. + for i in range(1, len(elements.ATOMIC_NUMS)+1): + element = elements.ATOMIC_NUMS[i] + if element.atomic_number in period_starts: + prev_group = 0 + fblock = 0 + if element.symbol == 'He': + # Full shell, not just full s subshell. + self.assertEqual(element.group, 18) + elif element.group == -1: + # Found a lanthanide (period 6) or actinide (period 7). + self.assertIn(element.period, [6, 7]) + fblock += 1 + elif element.atomic_number == 5 or element.atomic_number == 13: + # No d block (10 elements, groups 3-12) in periods 2 and 3. + self.assertEqual(element.group, prev_group + 11) + else: + # Group should increment monotonically. + self.assertEqual(element.group, prev_group + 1) + if element.group != -1: + prev_group = element.group + self.assertGreaterEqual(prev_group, 1) + self.assertLessEqual(prev_group, 18) + if element.group == 4 and element.period > 6: + # Should have seen 14 lanthanides (period 6) or 14 actinides (period 7). + self.assertEqual(fblock, 14) + + # The periodic table (up to element 118) contains 7 periods. + # Hydrogen and Helium are placed in groups 1 and 18 respectively. + # Groups 1-2 (s-block) and 13-18 (p-block) are present in the second + # period onwards, groups 3-12 (d-block) the fourth period onwards. + # Check each group contains the expected number of elements. + nelements_in_group = [0]*18 + for element in elements.ATOMIC_NUMS.values(): + if element.group != -1: + nelements_in_group[element.group-1] += 1 + self.assertListEqual(nelements_in_group, [7, 6] + [4]*10 + [6]*5 + [7]) + + +if __name__ == '__main__': + absltest.main() diff --git a/ferminet/tests/hamiltonian_test.py b/ferminet/tests/hamiltonian_test.py new file mode 100644 index 0000000..b7e2106 --- /dev/null +++ b/ferminet/tests/hamiltonian_test.py @@ -0,0 +1,285 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.hamiltonian.""" + +from ferminet import hamiltonian +from ferminet.utils import system +import numpy as np +import tensorflow.compat.v1 as tf + + +def _hydrogen(xs): + """Hydrogen (3D) atom ground state (1s) wavefunction. + + Energy: -1/2 hartrees. + + Args: + xs: tensor (batch_size, 3) of electron positions. + + Returns: + tensor (batch_size, 1) of the (unnormalised) wavefunction at each position. + """ + with tf.name_scope('Psi_H'): + return tf.exp(-tf.norm(xs, axis=1, keepdims=True)) + + +def _helium(xs): + """Compact parametrized Helium wavefunction. + + See https://opencommons.uconn.edu/chem_educ/30/ and Phys Rev A 74, 014501 + (2006). + + Energy: -2.901188 hartrees (compared to -2.9037243770 hartrees for the exact + ground state). + + Args: + xs: tensor (batch_size, 6) of electron positions. + + Returns: + tensor (batch_size, 1) of the (unnormalised) wavefunction at each pair of + positions. + """ + with tf.name_scope('Psi_He'): + x1, x2 = tf.split(xs, 2, axis=1) + x1n = tf.norm(x1, axis=1, keepdims=True) + x2n = tf.norm(x2, axis=1, keepdims=True) + s = x1n + x2n + t = x1n - x2n + u = tf.norm(x1 - x2, axis=1, keepdims=True) + return (tf.exp(-2*s) + * (1 + 0.5*u*tf.exp(-1.013*u)) + * (1 + 0.2119*s*u + 0.1406*t*t - 0.003*u*u)) + + +def _grid_points(start, stop, npoints, ndim): + x1 = np.linspace(start, stop, npoints) + x2 = np.zeros(npoints) + g1 = np.array([x1] + [x2 for _ in range(ndim-1)]).T + g2 = np.array([x1, x1] + [x2 for _ in range(ndim-2)]).T + return np.ascontiguousarray(g1), np.ascontiguousarray(g2) + + +def _wfn_to_log(f): + def _logf(x): + psi = f(x) + return tf.log(tf.abs(psi)), tf.sign(psi) + return _logf + + +class WavefunctionTest(tf.test.TestCase): + """Basic tests for the toy wavefunctions defined above.""" + + def test_hydrogen(self): + grid1, grid2 = _grid_points(-1.1, 1.1, 21, 3) + psi_np = np.exp(-np.abs(grid1[:, 0])) + + psi = _hydrogen(tf.constant(grid1)) + with tf.train.MonitoredSession() as session: + psi_ = np.squeeze(session.run(psi)) + np.testing.assert_allclose(psi_, psi_np) + np.testing.assert_allclose(psi_, psi_[::-1]) + + psi = _hydrogen(tf.constant(grid2)) + with tf.train.MonitoredSession() as session: + psi_ = np.squeeze(session.run(psi)) + np.testing.assert_allclose(psi_, psi_np**np.sqrt(2)) + np.testing.assert_allclose(psi_, psi_[::-1]) + + def test_helium(self): + grid1, grid2 = _grid_points(-1.1, 1.0, 5, 6) + grid1[:, 3] += 0.5 # Avoid nuclear singularity for second electron + grid2[:, 3] += 0.5 # Avoid nuclear singularity for second electron + + # Exact results calculated by hand... + psi1 = np.array([0.07484748391, + 0.17087261364, + 0.42062736263, + 0.14476421949, + 0.06836260905]) + psi2 = np.array([0.037059842334, + 0.114854125198, + 0.403704643707, + 0.123475237250, + 0.040216273342]) + + psi = _helium(tf.constant(grid1)) + with tf.train.MonitoredSession() as session: + psi_ = np.squeeze(session.run(psi)) + np.testing.assert_allclose(psi_, psi1) + + # Check for symmetry: swap x and z and electron 1 and electron 2. + grid1 = np.flip(grid1, axis=1) + psi = _helium(tf.constant(grid1)) + with tf.train.MonitoredSession() as session: + psiz_ = np.squeeze(session.run(psi)) + np.testing.assert_allclose(psi_, psiz_) + + psi = _helium(tf.constant(grid2)) + with tf.train.MonitoredSession() as session: + psi_ = np.squeeze(session.run(psi)) + np.testing.assert_allclose(psi_, psi2) + + +class HamiltonianAtomTest(tf.test.TestCase): + + def test_hydrogen_atom(self): + xs = tf.random_uniform((6, 3), minval=-1.0, maxval=1.0) + atoms = [system.Atom(symbol='H', coords=(0, 0, 0))] + kin, pot = hamiltonian.operators(atoms, 1, 0.0) + logpsi = tf.log(tf.abs(_hydrogen(xs))) + e_loc = kin(logpsi, xs) + pot(xs) + with tf.train.MonitoredSession() as session: + e_loc_ = session.run(e_loc) + # Wavefunction is the ground state eigenfunction. Hence H\Psi = -1/2 \Psi. + np.testing.assert_allclose(e_loc_, -0.5, rtol=1.e-6, atol=1.e-7) + + def test_helium_atom(self): + x1 = np.linspace(-1, 1, 6, dtype=np.float32) + x2 = np.zeros(6, dtype=np.float32) + 0.25 + x12 = np.array([x1] + [x2 for _ in range(5)]).T + atoms = [system.Atom(symbol='He', coords=(0, 0, 0))] + hpsi_test = np.array([ + -0.25170374, -0.43359983, -0.61270618, -1.56245542, -0.39977553, + -0.2300467 + ]) + + log_helium = _wfn_to_log(_helium) + h = hamiltonian.exact_hamiltonian(atoms, 2) + xs = tf.constant(x12) + _, hpsi = h(log_helium, xs) + with tf.train.MonitoredSession() as session: + hpsi_ = session.run(hpsi) + + xs = tf.reverse(xs, axis=[1]) + _, hpsi2 = h(log_helium, xs) + with tf.train.MonitoredSession() as session: + hpsi2_ = session.run(hpsi2) + + np.testing.assert_allclose(hpsi_[:, 0], hpsi_test, rtol=1.e-6) + np.testing.assert_allclose(hpsi_, hpsi2_, rtol=1.e-6) + + +class HamiltonianMoleculeTest(tf.test.TestCase): + + def setUp(self): + super(HamiltonianMoleculeTest, self).setUp() + self.rh1 = np.array([0, 0, 0], dtype=np.float32) + self.rh2 = np.array([0, 0, 200], dtype=np.float32) + self.rhh = self.rh2 - self.rh1 + self.dhh = 1.0 / np.sqrt(np.dot(self.rhh, self.rhh)) + self.xs = (2 * np.random.random((6, 3)) - 1).astype(np.float32) + self.xs = [self.xs + self.rh1, self.xs + self.rh2] + self.atoms = [ + system.Atom(symbol='H', coords=self.rh1), + system.Atom(symbol='H', coords=self.rh2) + ] + + def test_hydrogen_molecular_ion(self): + + def h2_psi(xs): + # Trial wavefunction as linear combination of H 1s wavefunctions centered + # on each atom. + return _hydrogen(xs - self.rh1) + _hydrogen(xs - self.rh2) + + h = hamiltonian.exact_hamiltonian(self.atoms, 1, 0.0) + # H2+; 1 electron. Use points around both nuclei. + xs = tf.constant(np.concatenate(self.xs, axis=0)) + psi, hpsi = h(_wfn_to_log(h2_psi), xs) + with tf.train.MonitoredSession() as session: + psi_, hpsi_ = session.run([psi, hpsi]) + # Leading order correction to energy is nuclear interaction with the far + # nucleus. + np.testing.assert_allclose( + hpsi_, -(0.5 + self.dhh) * psi_, rtol=self.dhh, atol=self.dhh) + + def test_hydrogen_molecule(self): + + def h2_psi(xs): + x1, x2 = tf.split(xs, 2, axis=1) + # Essentially a non-interacting system. + return _hydrogen(x1 - self.rh1) * _hydrogen(x2 - self.rh2) + + h = hamiltonian.exact_hamiltonian(self.atoms, 2, 0.0) + # H2; 2 electrons. Place one electron around each nucleus. + xs = tf.constant(np.concatenate(self.xs, axis=1)) + psi, hpsi = h(_wfn_to_log(h2_psi), xs) + with tf.train.MonitoredSession() as session: + psi_, hpsi_ = session.run([psi, hpsi]) + np.testing.assert_allclose( + hpsi_, -(1 + self.dhh) * psi_, rtol=self.dhh, atol=self.dhh) + + +class R12FeaturesTest(tf.test.TestCase): + + def test_r12_features_atom1(self): + atoms = [system.Atom(symbol='H', coords=(0, 0, 0))] + one = np.ones((1, 3)) + xs = tf.constant(one, dtype=tf.float32) + xs12 = hamiltonian.r12_features(xs, atoms, 1, flatten=True) + with tf.train.MonitoredSession() as session: + xs12_ = session.run(xs12) + # Should add |x|. + x_test = np.concatenate([[[np.linalg.norm(one[0])]], one], axis=1) + np.testing.assert_allclose(xs12_, x_test) + + def test_r12_features_atom2(self): + atoms = [system.Atom(symbol='He', coords=(0, 0, 0))] + one = np.concatenate([np.ones((1, 3)), 0.5 * np.ones((1, 3))], axis=1) + xs = tf.constant(one, dtype=tf.float32) + xs12 = hamiltonian.r12_features(xs, atoms, 2, flatten=True) + with tf.train.MonitoredSession() as session: + xs12_ = session.run(xs12) + # Should add |x_1|, |x_2|, |x1-x2| + norm = np.linalg.norm + x_test = np.concatenate([ + [[ + norm(one[0, :3]), + norm(one[0, 3:]), + norm(one[0, :3] - one[0, 3:]), + ]], + one + ], + axis=1) + np.testing.assert_allclose(xs12_, x_test) + + def test_r12_features_molecule(self): + atoms = [ + system.Atom(symbol='H', coords=(0, 0, 0)), + system.Atom(symbol='H', coords=(1, 0, 0)) + ] + one = np.concatenate([np.ones((1, 3)), 0.5 * np.ones((1, 3))], axis=1) + xs = tf.constant(one, dtype=tf.float32) + xs12 = hamiltonian.r12_features(xs, atoms, 2, flatten=True) + with tf.train.MonitoredSession() as session: + xs12_ = session.run(xs12) + # Should add |x_11|, |x_21|, |x_12|, |x_22|, |x1-x2| + norm = np.linalg.norm + x_test = np.concatenate([ + [[ + norm(one[0, :3]), + norm(one[0, :3] - atoms[1].coords), + norm(one[0, 3:]), + norm(one[0, 3:] - atoms[1].coords), + norm(one[0, :3] - one[0, 3:]), + ]], + one + ], + axis=1) + np.testing.assert_allclose(xs12_, x_test, rtol=1e-6) + + +if __name__ == '__main__': + tf.test.main() diff --git a/ferminet/tests/mcmc_test.py b/ferminet/tests/mcmc_test.py new file mode 100644 index 0000000..aa87fb1 --- /dev/null +++ b/ferminet/tests/mcmc_test.py @@ -0,0 +1,119 @@ +# Lint as: python3 +# Copyright 2020 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.mcmc.""" + +from absl.testing import parameterized + +from ferminet import hamiltonian +from ferminet import mcmc +from ferminet.utils import system +import numpy as np +import tensorflow.compat.v1 as tf + + +def _hydrogen(xs): + """Hydrogen (3D) atom ground state (1s) wavefunction. + + Energy: -1/2 hartrees. + + Args: + xs: tensor (batch_size, 3) of electron positions. + + Returns: + tensor (batch_size, 1) of the (unnormalised) wavefunction at each position. + """ + with tf.name_scope('Psi_H'): + return tf.exp(-tf.norm(xs, axis=1, keepdims=True)) + + +def _helium(xs): + """Compact parametrized Helium wavefunction. + + See https://opencommons.uconn.edu/chem_educ/30/ and Phys Rev A 74, 014501 + (2006). + + Energy: -2.901188 hartrees (compared to -2.9037243770 hartrees for the exact + ground state). + + Args: + xs: tensor (batch_size, 6) of electron positions. + + Returns: + tensor (batch_size, 1) of the (unnormalised) wavefunction at each pair of + positions. + """ + with tf.name_scope('Psi_He'): + x1, x2 = tf.split(xs, 2, axis=1) + x1n = tf.norm(x1, axis=1, keepdims=True) + x2n = tf.norm(x2, axis=1, keepdims=True) + s = x1n + x2n + t = x1n - x2n + u = tf.norm(x1 - x2, axis=1, keepdims=True) + return (tf.exp(-2*s) + * (1 + 0.5*u*tf.exp(-1.013*u)) + * (1 + 0.2119*s*u + 0.1406*t*t - 0.003*u*u)) + + +def _run_mcmc(atoms, + nelectrons, + net, + batch_size=1024, + steps=10, + dtype=tf.float32): + gen = mcmc.MCMC( + net, + batch_size, + [0] * 3 * nelectrons, + 1.0, + 0.1, + dtype=dtype) + kin, pot = hamiltonian.operators(atoms, nelectrons, 0.0) + walkers = tf.squeeze(gen.walkers) + psi, _ = net(walkers) + e_loc = tf.reduce_sum(kin(psi, walkers) + pot(walkers)) / batch_size + e = [] + mcmc_step = gen.step() + with tf.train.MonitoredSession() as session: + for _ in range(steps): + session.run(mcmc_step) + e.append(session.run(e_loc)) + return np.array(e) + + +class McmcTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + {'dtype': tf.float64}, + {'dtype': tf.float64}, + ) + def test_hydrogen_atom(self, dtype): + atoms = [system.Atom(symbol='H', coords=(0, 0, 0))] + def net(x): + psi = _hydrogen(x) + return (tf.log(tf.abs(psi)), tf.sign(psi)) + e = _run_mcmc(atoms, 1, net, dtype=dtype) + np.testing.assert_allclose(e, -np.ones_like(e) / 2) + + def test_helium_atom(self): + atoms = [system.Atom(symbol='He', coords=(0, 0, 0))] + def net(x): + psi = _helium(x) + return (tf.log(tf.abs(psi)), tf.sign(psi)) + e = _run_mcmc(atoms, 2, net, steps=500) + np.testing.assert_allclose(e[100:].mean(), -2.901188, atol=5.e-3) + +if __name__ == '__main__': + tf.test.main() diff --git a/ferminet/tests/networks_test.py b/ferminet/tests/networks_test.py new file mode 100644 index 0000000..a7d45fd --- /dev/null +++ b/ferminet/tests/networks_test.py @@ -0,0 +1,394 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for ferminet.networks.""" + +from absl.testing import parameterized +from ferminet import networks +from ferminet.utils import scf +from ferminet.utils import system +import numpy as np +import pyscf +import tensorflow.compat.v1 as tf + + +class NetworksTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(NetworksTest, self).setUp() + # disable use of temp directory in pyscf. + # Test calculations are small enough to fit in RAM and we don't need + # checkpoint files. + pyscf.lib.param.TMPDIR = None + + @parameterized.parameters( + { + 'hidden_units': [8, 8], + 'after_det': [8, 8, 2], + }, + { + 'hidden_units': [[8, 8], [8, 8]], + 'after_det': [8, 8, 2], + }, + ) + def test_ferminet(self, hidden_units, after_det): + """Check that FermiNet is actually antisymmetric.""" + atoms = [ + system.Atom(symbol='H', coords=(0, 0, -1.0)), + system.Atom(symbol='H', coords=(0, 0, 1.0)), + ] + ne = (3, 2) + x1 = tf.random_normal([3, 3 * sum(ne)]) + xs = tf.split(x1, sum(ne), axis=1) + + # swap indices to test antisymmetry + x2 = tf.concat([xs[1], xs[0]] + xs[2:], axis=1) + x3 = tf.concat( + xs[:ne[0]] + [xs[ne[0] + 1], xs[ne[0]]] + xs[ne[0] + 2:], axis=1) + + ferminet = networks.FermiNet( + atoms=atoms, + nelectrons=ne, + slater_dets=4, + hidden_units=hidden_units, + after_det=after_det) + y1 = ferminet(x1) + y2 = ferminet(x2) + y3 = ferminet(x3) + with tf.train.MonitoredSession() as session: + out1, out2, out3 = session.run([y1, y2, y3]) + np.testing.assert_allclose(out1, -out2, rtol=4.e-5, atol=1.e-6) + np.testing.assert_allclose(out1, -out3, rtol=4.e-5, atol=1.e-6) + + def test_ferminet_mask(self): + """Check that FermiNet with a decaying mask on the output works.""" + atoms = [ + system.Atom(symbol='H', coords=(0, 0, -1.0)), + system.Atom(symbol='H', coords=(0, 0, 1.0)), + ] + ne = (3, 2) + hidden_units = [[8, 8], [8, 8]] + after_det = [8, 8, 2] + x = tf.random_normal([3, 3 * sum(ne)]) + + ferminet = networks.FermiNet( + atoms=atoms, + nelectrons=ne, + slater_dets=4, + hidden_units=hidden_units, + after_det=after_det, + envelope=True) + y = ferminet(x) + with tf.train.MonitoredSession() as session: + session.run(y) + + @parameterized.parameters(1, 3) + def test_ferminet_size(self, size): + atoms = [ + system.Atom(symbol='H', coords=(0, 0, -1.0)), + system.Atom(symbol='H', coords=(0, 0, 1.0)), + ] + ne = (size, size) + nout = 1 + batch_size = 3 + ndet = 4 + hidden_units = [[8, 8], [8, 8]] + after_det = [8, 8, nout] + x = tf.random_normal([batch_size, 3 * sum(ne)]) + + ferminet = networks.FermiNet( + atoms=atoms, + nelectrons=ne, + slater_dets=ndet, + hidden_units=hidden_units, + after_det=after_det) + y = ferminet(x) + for i in range(len(ne)): + self.assertEqual(ferminet._dets[i].shape.as_list(), [batch_size, ndet]) + self.assertEqual(ferminet._orbitals[i].shape.as_list(), + [batch_size, ndet, size, size]) + self.assertEqual(y.shape.as_list(), [batch_size, nout]) + + @parameterized.parameters( + { + 'hidden_units': [8, 8], + 'after_det': [8, 8, 2] + }, + { + 'hidden_units': [[8, 8], [8, 8]], + 'after_det': [8, 8, 2] + }, + ) + def test_ferminet_pretrain(self, hidden_units, after_det): + """Check that FermiNet pretraining runs.""" + atoms = [ + system.Atom(symbol='Li', coords=(0, 0, -1.0)), + system.Atom(symbol='Li', coords=(0, 0, 1.0)), + ] + ne = (4, 2) + x = tf.random_normal([1, 10, 3 * sum(ne)]) + + strategy = tf.distribute.get_strategy() + + with strategy.scope(): + ferminet = networks.FermiNet( + atoms=atoms, + nelectrons=ne, + slater_dets=4, + hidden_units=hidden_units, + after_det=after_det, + pretrain_iterations=10) + + # Test Hartree fock pretraining - no change of position. + hf_approx = scf.Scf(atoms, nelectrons=ne) + hf_approx.run() + pretrain_op_hf = networks.pretrain_hartree_fock(ferminet, x, strategy, + hf_approx) + self.assertEqual(ferminet.pretrain_iterations, 10) + with tf.train.MonitoredSession() as session: + for _ in range(ferminet.pretrain_iterations): + session.run(pretrain_op_hf) + + +class LogDetMatmulTest(tf.test.TestCase, parameterized.TestCase): + + def _make_mats(self, n, m, l1, l2, k, singular=False): + x1 = np.random.randn(n, m, l1, l1).astype(np.float32) + if singular: + # Make one matrix singular + x1[0, 0, 0] = 2.0 * x1[0, 0, 1] + x2 = np.random.randn(n, m, l2, l2).astype(np.float32) + w = np.random.randn(m, k).astype(np.float32) + + x1_tf = tf.constant(x1) + x2_tf = tf.constant(x2) + w_tf = tf.constant(w) + + output = networks.logdet_matmul(x1_tf, x2_tf, w_tf) + return x1, x2, w, x1_tf, x2_tf, w_tf, output + + def _cofactor(self, x): + u, s, v = np.linalg.svd(x) + ss = np.tile(s[..., None], [1, s.shape[0]]) + np.fill_diagonal(ss, 1.0) + z = np.prod(ss, axis=0) + return np.dot(u, np.dot(np.diag(z), v)).transpose() + + @parameterized.parameters(True, False) + def test_logdet_matmul(self, singular): + n = 4 + m = 5 + l1 = 3 + l2 = 4 + k = 2 + x1, x2, w, _, _, _, output = self._make_mats( + n, m, l1, l2, k, singular=singular) + + with tf.Session() as session: + logout, signout = session.run(output) + tf_result = np.exp(logout) * signout + det1 = np.zeros([n, m], dtype=np.float32) + det2 = np.zeros([n, m], dtype=np.float32) + for i in range(n): + for j in range(m): + det1[i, j] = np.linalg.det(x1[i, j]) + det2[i, j] = np.linalg.det(x2[i, j]) + np_result = np.dot(det1 * det2, w) + np.testing.assert_allclose(np_result, tf_result, atol=1e-5, rtol=1e-5) + + @parameterized.parameters(True, False) + def test_logdet_matmul_grad(self, singular): + n = 4 + m = 5 + l1 = 3 + l2 = 4 + k = 2 + result = self._make_mats(n, m, l1, l2, k, singular=singular) + x1, x2, w, x1_tf, x2_tf, w_tf, output = result + + with tf.Session(): + theor_x1, numer_x1 = tf.test.compute_gradient( + x1_tf, [n, m, l1, l1], output[0], [n, k], x_init_value=x1) + np.testing.assert_allclose(theor_x1, numer_x1, atol=1e-2, rtol=1e-3) + + theor_x2, numer_x2 = tf.test.compute_gradient( + x2_tf, [n, m, l2, l2], output[0], [n, k], x_init_value=x2) + np.testing.assert_allclose(theor_x2, numer_x2, atol=1e-2, rtol=1e-3) + + theor_w, numer_w = tf.test.compute_gradient( + w_tf, [m, k], output[0], [n, k], x_init_value=w) + np.testing.assert_allclose(theor_w, numer_w, atol=1e-2, rtol=1e-3) + + @parameterized.parameters(True, False) + def test_logdet_matmul_grad_grad(self, singular): + n = 2 + m = 3 + l1 = 2 + l2 = 3 + k = 2 + result = self._make_mats(n, m, l1, l2, k, singular=singular) + x1, x2, w, x1_tf, x2_tf, w_tf, output = result + glog = np.random.randn(*(output[0].shape.as_list())).astype(np.float32) + glog_tf = tf.constant(glog) + grad_op = tf.gradients(output[0], [x1_tf, x2_tf, w_tf], grad_ys=glog_tf) + inp_op = [x1_tf, x2_tf, w_tf, glog_tf] + inp_np = [x1, x2, w, glog] + + with tf.Session(): + for i in range(len(inp_op)): + for j in range(len(grad_op)): + theor, numer = tf.test.compute_gradient( + inp_op[i], + inp_op[i].shape, + grad_op[j], + grad_op[j].shape, + x_init_value=inp_np[i]) + np.testing.assert_allclose(theor, numer, atol=1e-2, rtol=1.3e-3) + + +class AdjugateGradTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (2, False), + (2, True), + (8, False), + (8, True), + ) + def test_grad_adj(self, dim, singular): + + def cofactor_tf(xs): + s, u, v = tf.linalg.svd(xs) + return networks.cofactor(u, s, v) + + @tf.custom_gradient + def cofactor_tf_custom(xs): + s, u, v = tf.linalg.svd(xs) + + def grad(dy): + r = networks.rho(s, 0.0) + return networks.grad_cofactor(u, v, r, dy) + + return networks.cofactor(u, s, v), grad + + n, m = 6, 3 + x = np.random.randn(n, m, dim, dim).astype(np.float32) + if singular: + # Make one matrix singular + x[0, 0, 0] = 2.0 * x[0, 0, 1] + x_tf = tf.constant(x) + adj1 = cofactor_tf(x_tf) + grad_adj_backprop = tf.gradients(adj1, x_tf) + adj2 = cofactor_tf_custom(x_tf) + grad_adj_closed = tf.gradients(adj2, x_tf) + + with tf.train.MonitoredSession() as session: + backprop, closed = session.run([grad_adj_backprop, grad_adj_closed]) + + np.testing.assert_allclose(backprop, closed, atol=1.e-5, rtol=1.e-3) + + +class GammaTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + ((5,), 0), + ((5,), 2), + (( + 12, + 6, + 5, + ), 0), + (( + 12, + 1, + 8, + ), 2), + (( + 12, + 8, + 8, + ), 2), + ) + def test_gamma(self, shape, nsingular): + + s = tf.Variable(tf.random_uniform(shape)) + if nsingular > 0: + zeros = list(shape) + zeros[-1] = nsingular + s = s[..., shape[-1] - nsingular:].assign(tf.zeros(zeros)) + + g = networks.gamma(s) + + with tf.train.MonitoredSession() as session: + s_, g_ = session.run([s, g]) + + s_flat = np.reshape(s_, (-1, shape[-1])) + gamma_exact = np.ones_like(s_flat) + for i, row in enumerate(s_flat): + for j in range(shape[-1]): + row[j], r_j = 1.0, row[j] + gamma_exact[i, j] = np.prod(row) + row[j] = r_j + gamma_exact = np.reshape(gamma_exact, g_.shape) + np.testing.assert_allclose(g_, gamma_exact, atol=1.e-7) + + +class RhoTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (None, 2, 0), + (None, 5, 0), + (None, 5, 2), + ((8, 4), 5, 2), + ) + def test_rho(self, tile, dim, nsingular): + + assert dim > 0 + assert dim > nsingular + s = tf.Variable(tf.random_uniform((dim,))) + s = s[dim - nsingular:].assign(tf.zeros(nsingular)) + + s_batch = s + if tile: + for _ in range(len(tile)): + s_batch = tf.expand_dims(s_batch, 0) + s_batch = tf.tile(s_batch, list(tile) + [1]) + + r = networks.rho(s) + r_batch = networks.rho(s_batch) + + with tf.train.MonitoredSession() as session: + s_, r_, r_batch_ = session.run([s, r, r_batch]) + + rho_exact = np.zeros((dim, dim), dtype=np.float32) + for i in range(dim - 1): + s_[i], s_i = 1.0, s_[i] + for j in range(i + 1, dim): + s_[j], s_j = 1.0, s_[j] + rho_exact[i, j] = np.prod(s_) + s_[j] = s_j + s_[i] = s_i + rho_exact = rho_exact + np.transpose(rho_exact) + + atol = 1e-5 + rtol = 1e-5 + np.testing.assert_allclose(r_, rho_exact, atol=atol, rtol=rtol) + + if tile: + r_batch_ = np.reshape(r_batch_, (-1, dim, dim)) + for i in range(r_batch_[0].shape[0]): + np.testing.assert_allclose(r_batch_[i], rho_exact, atol=atol, rtol=rtol) + + +if __name__ == '__main__': + tf.test.main() diff --git a/ferminet/tests/scf_test.py b/ferminet/tests/scf_test.py new file mode 100644 index 0000000..1bf7f37 --- /dev/null +++ b/ferminet/tests/scf_test.py @@ -0,0 +1,238 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.scf.""" + +from typing import List, Tuple +from absl.testing import parameterized +from ferminet.utils import scf +from ferminet.utils import system +import numpy as np +import pyscf +import tensorflow.compat.v1 as tf + + +def create_o2_hf(bond_length): + molecule = [ + system.Atom('O', (0, 0, 0)), + system.Atom('O', (0, 0, bond_length)) + ] + spin = 2 + oxygen_atomic_number = 8 + nelectrons = [oxygen_atomic_number + spin, oxygen_atomic_number - spin] + hf = scf.Scf(molecule=molecule, nelectrons=nelectrons, restricted=False) + return hf + + +class ScfTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(ScfTest, self).setUp() + # disable use of temp directory in pyscf. + # Test calculations are small enough to fit in RAM and we don't need + # checkpoint files. + pyscf.lib.param.TMPDIR = None + + @parameterized.parameters( + { + 'molecule': [system.Atom('He', (0, 0, 0))], + 'nelectrons': (1, 1) + }, + { + 'molecule': [system.Atom('N', (0, 0, 0))], + 'nelectrons': (5, 2) + }, + { + 'molecule': [system.Atom('N', (0, 0, 0))], + 'nelectrons': (5, 3) + }, + { + 'molecule': [system.Atom('N', (0, 0, 0))], + 'nelectrons': (4, 2) + }, + { + 'molecule': [system.Atom('O', (0, 0, 0))], + 'nelectrons': (5, 3), + 'restricted': False, + }, + { + 'molecule': [ + system.Atom('N', (0, 0, 0)), + system.Atom('N', (0, 0, 1.4)) + ], + 'nelectrons': (7, 7) + }, + { + 'molecule': [ + system.Atom('O', (0, 0, 0)), + system.Atom('O', (0, 0, 1.4)) + ], + 'nelectrons': (9, 7), + 'restricted': False, + }, + ) + def test_scf_interface(self, + molecule: List[system.Atom], + nelectrons: Tuple[int, int], + restricted: bool = True): + """Tests SCF interface to a pyscf calculation. + + pyscf has its own tests so only check that we can run calculations over + atoms and simple diatomics using the interface in ferminet.scf. + + Args: + molecule: List of system.Atom objects giving atoms in the molecule. + nelectrons: Tuple containing number of alpha and beta electrons. + restricted: If true, run a restricted Hartree-Fock calculation, otherwise + run an unrestricted Hartree-Fock calculation. + """ + npts = 100 + xs = np.random.randn(npts, 3) + hf = scf.Scf(molecule=molecule, + nelectrons=nelectrons, + restricted=restricted) + hf.run() + mo_vals = hf.eval_mos(xs) + self.assertLen(mo_vals, 2) # alpha-spin orbitals and beta-spin orbitals. + for spin_mo_vals in mo_vals: + # Evalute npts points on M orbitals/functions - (npts, M) array. + self.assertEqual(spin_mo_vals.shape, (npts, hf._mol.nao_nr())) + + @parameterized.parameters(np.float32, np.float64) + def test_tf_eval_mos(self, dtype): + """Tests tensorflow op for evaluating Hartree-Fock orbitals. + + Args: + dtype: numpy type to use for position vectors. + """ + xs = np.random.randn(100, 3).astype(dtype) + hf = create_o2_hf(1.4) + hf.run() + + # Evaluate MOs at positions xs directly in pyscf. + # hf.eval_mos returns np.float64 arrays always. Cast to dtype for + # like-for-like comparison. + mo_vals = [mo_val.astype(dtype) for mo_val in hf.eval_mos(xs, deriv=True)] + + # Evaluate MOs as a tensorflow op. + tf_xs = tf.constant(xs) + tf_mo_vals = hf.tf_eval_mos(tf_xs, deriv=True) + tf_dmo_vals = tf.gradients(tf.square(tf_mo_vals), tf_xs)[0] + tf_dmo_vals_shape = tf.shape(tf_dmo_vals) + with tf.train.MonitoredSession() as session: + tf_mo_vals_, tf_dmo_vals_ = session.run([tf_mo_vals, tf_dmo_vals]) + tf_dmo_vals_shape_ = session.run(tf_dmo_vals_shape) + tf_mo_vals_ = np.split(tf_mo_vals_, 2, axis=-1) + + for np_val, tf_val in zip(mo_vals, tf_mo_vals_): + self.assertEqual(np_val.dtype, tf_val.dtype) + np.testing.assert_allclose(np_val[0], tf_val) + + np.testing.assert_array_equal(tf_dmo_vals_shape_, xs.shape) + np.testing.assert_array_equal(tf_dmo_vals_shape_, tf_dmo_vals_.shape) + # Compare analytic derivative of orbital^2 with tensorflow backprop. + # Sum over the orbital index and spin channel because of definition of + # tensorflow gradients. + np_deriv = sum( + np.sum(2 * np_val[1:] * np.expand_dims(np_val[0], 0), axis=-1) + for np_val in mo_vals) + # Tensorflow returns (N,3) but pyscf returns (3, N, M). + np_deriv = np.transpose(np_deriv) + self.assertEqual(np_deriv.dtype, tf_dmo_vals_.dtype) + if dtype == np.float32: + atol = 1e-6 + rtol = 1e-6 + else: + atol = 0 + rtol = 1.e-7 + np.testing.assert_allclose(np_deriv, tf_dmo_vals_, atol=atol, rtol=rtol) + + def test_tf_eval_mos_deriv(self): + + hf = create_o2_hf(1.4) + hf.run() + xs = tf.random_normal((100, 3)) + tf_mos_derivs = hf.tf_eval_mos(xs, deriv=True) + tf_mos = hf.tf_eval_mos(xs, deriv=False) + with tf.train.MonitoredSession() as session: + tf_mos_, tf_mos_derivs_ = session.run([tf_mos, tf_mos_derivs]) + np.testing.assert_allclose(tf_mos_, tf_mos_derivs_) + + def test_tf_eval_hf(self): + + # Check we get consistent answers between multiple calls to eval_mos and + # a single tf_eval_hf call. + molecule = [system.Atom('O', (0, 0, 0))] + nelectrons = (5, 3) + hf = scf.Scf(molecule=molecule, + nelectrons=nelectrons, + restricted=False) + hf.run() + + batch = 100 + xs = [np.random.randn(batch, 3) for _ in range(sum(nelectrons))] + mos = [] + for i, x in enumerate(xs): + ispin = 0 if i < nelectrons[0] else 1 + orbitals = hf.eval_mos(x)[ispin] + # Select occupied orbitals via Aufbau. + mos.append(orbitals[:, :nelectrons[ispin]]) + np_mos = (np.stack(mos[:nelectrons[0]], axis=1), + np.stack(mos[nelectrons[0]:], axis=1)) + + tf_xs = tf.constant(np.stack(xs, axis=1)) + tf_mos = hf.tf_eval_hf(tf_xs, deriv=True) + with tf.train.MonitoredSession() as session: + tf_mos_ = session.run(tf_mos) + + for i, (np_mos_mat, tf_mos_mat) in enumerate(zip(np_mos, tf_mos_)): + self.assertEqual(np_mos_mat.shape, tf_mos_mat.shape) + self.assertEqual(np_mos_mat.shape, (batch, nelectrons[i], nelectrons[i])) + np.testing.assert_allclose(np_mos_mat, tf_mos_mat) + + def test_tf_eval_slog_wavefuncs(self): + + # Check TensorFlow evaluation runs and gives correct shapes. + molecule = [system.Atom('O', (0, 0, 0))] + nelectrons = (5, 3) + total_electrons = sum(nelectrons) + num_spatial_dim = 3 + hf = scf.Scf(molecule=molecule, + nelectrons=nelectrons, + restricted=False) + hf.run() + + batch = 100 + rng = np.random.RandomState(1) + flat_positions_np = rng.randn(batch, + total_electrons * num_spatial_dim) + + flat_positions_tf = tf.constant(flat_positions_np) + for method in [hf.tf_eval_slog_slater_determinant, + hf.tf_eval_slog_hartree_product]: + slog_wavefunc, signs = method(flat_positions_tf) + with tf.train.MonitoredSession() as session: + slog_wavefunc_, signs_ = session.run([slog_wavefunc, signs]) + self.assertEqual(slog_wavefunc_.shape, (batch, 1)) + self.assertEqual(signs_.shape, (batch, 1)) + hartree_product = hf.tf_eval_hartree_product(flat_positions_tf) + with tf.train.MonitoredSession() as session: + hartree_product_ = session.run(hartree_product) + np.testing.assert_allclose(hartree_product_, + np.exp(slog_wavefunc_) * signs_) + + +if __name__ == '__main__': + tf.test.main() diff --git a/ferminet/tests/system_test.py b/ferminet/tests/system_test.py new file mode 100644 index 0000000..cb28f98 --- /dev/null +++ b/ferminet/tests/system_test.py @@ -0,0 +1,100 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.utils.system.""" + +from absl.testing import absltest +from absl.testing import parameterized +from ferminet.utils import system +from ferminet.utils import units +import numpy as np + + +class SystemAtomCoordsTest(absltest.TestCase): + + def test_atom_coords(self): + xs = np.random.uniform(size=3) + atom = system.Atom(symbol='H', coords=xs, units='angstrom') + np.testing.assert_allclose(atom.coords / xs, [units.BOHR_ANGSTROM]*3) + np.testing.assert_allclose(atom.coords_angstrom, xs) + + def test_atom_units(self): + system.Atom(symbol='H', coords=[1, 2, 3], units='bohr') + system.Atom(symbol='H', coords=[1, 2, 3], units='angstrom') + with self.assertRaises(ValueError): + system.Atom(symbol='H', coords=[1, 2, 3], units='dummy') + + +class SystemCreationsTest(parameterized.TestCase): + + @parameterized.parameters( + {'symbol': 'He', 'charge': 0}, + {'symbol': 'C', 'charge': 0}, + {'symbol': 'Ne', 'charge': 0}, + {'symbol': 'Ne', 'charge': 1}, + {'symbol': 'Ne', 'charge': -1}, + ) + def test_create_atom(self, symbol, charge): + mol, spins = system.atom(symbol, charge=charge) + self.assertLen(mol, 1) + self.assertEqual(mol[0].symbol, symbol) + self.assertEqual(sum(spins), mol[0].atomic_number - charge) + np.testing.assert_allclose(np.asarray(mol[0].coords), np.zeros(3)) + + @parameterized.parameters( + {'symbol': 'LiH'}, + {'symbol': 'Li2'}, + {'symbol': 'N2'}, + {'symbol': 'CO'}, + {'symbol': 'CH4'}, + {'symbol': 'NH3'}, + {'symbol': 'C2H4'}, + {'symbol': 'C4H6'}, + ) + def test_create_molecule(self, symbol): + _, _ = system.molecule(symbol) + + @parameterized.parameters( + {'n': 10, 'r': 1.0}, + {'n': 11, 'r': 1.0}, + {'n': 20, 'r': 2.0}, + ) + def test_create_hydrogen_chain(self, n, r): + mol, spins = system.hn(n, r) + self.assertLen(mol, n) + for atom in mol: + self.assertAlmostEqual(atom.coords[0], 0) + self.assertAlmostEqual(atom.coords[1], 0) + for atom1, atom2 in zip(mol[:-1], mol[1:]): + self.assertAlmostEqual(atom2.coords[2] - atom1.coords[2], r) + self.assertEqual(spins, (n - n // 2, n // 2)) + + @parameterized.parameters( + {'r': 1.0, 'angle': np.pi/4.0}, + {'r': 1.0, 'angle': np.pi/6.0}, + {'r': 2.0, 'angle': np.pi/4.0}, + ) + def test_create_hydrogen_circle(self, r, angle): + mol, spins = system.h4_circle(r, angle) + self.assertEqual(spins, (2, 2)) + self.assertLen(mol, 4) + for atom in mol: + self.assertAlmostEqual(atom.coords[2], 0) + theta = np.abs(np.arctan(atom.coords[1] / atom.coords[0])) + self.assertAlmostEqual(theta, angle) + + +if __name__ == '__main__': + absltest.main() diff --git a/ferminet/tests/train_test.py b/ferminet/tests/train_test.py new file mode 100644 index 0000000..7daf988 --- /dev/null +++ b/ferminet/tests/train_test.py @@ -0,0 +1,268 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.train.""" + +import os + +from absl import flags +from absl.testing import parameterized +from ferminet import train +from ferminet.utils import system +import numpy as np +import pyscf +import tensorflow.compat.v1 as tf + +FLAGS = flags.FLAGS +# Default flags are sufficient so mark FLAGS as parsed so we can run the tests +# with py.test, which imports this file rather than runs it. +FLAGS.mark_as_parsed() + + +class MolVmcTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(MolVmcTest, self).setUp() + # disable use of temp directory in pyscf. + # Test calculations are small enough to fit in RAM and we don't need + # checkpoint files. + pyscf.lib.param.TMPDIR = None + + def _get_atoms(self, name, kwargs=None): + configs = { + 'h2': lambda _: system.diatomic('H', 'H', bond_length=1.4), + 'helium': lambda _: system.atom('He'), + 'helium_triplet': lambda _: system.atom('He', spins=(2, 0)), + # of each spin. Handle entirely spin-polarised systems. + 'hydrogen': lambda _: system.atom('H'), + 'lithium': lambda _: system.atom('Li'), + 'hn': lambda kwargs: system.hn(r=1.4, **kwargs), + } + return configs[name](kwargs) + + @parameterized.parameters( + { + 'name': 'h2', + 'use_kfac': False + }, + { + 'name': 'helium', + 'use_kfac': False + }, + { + 'name': 'lithium', + 'use_kfac': False + }, + { + 'name': 'helium', + 'use_kfac': False + }, + { + 'name': 'helium_triplet', + 'use_kfac': False + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': False + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': True + }, + { + 'name': 'hn', + 'args': {'n': 5}, + 'use_kfac': True + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': True + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': False + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': True + }, + { + 'name': 'hn', + 'args': {'n': 3}, + 'use_kfac': True, + }, + ) + def test_system(self, + name, + args=None, + use_kfac=True): + atoms, electrons = self._get_atoms(name, args) + train.train( + atoms, + electrons, + batch_size=8, + network_config=train.NetworkConfig(determinants=2), + pretrain_config=train.PretrainConfig(iterations=20), + optim_config=train.OptimConfig(iterations=2, use_kfac=use_kfac), + mcmc_config=train.MCMCConfig(steps=2), + logging_config=train.LoggingConfig( + result_path=self.create_tempdir().full_path), + multi_gpu=False) + + def test_restart(self): + atoms, electrons = self._get_atoms('lithium') + + batch_size = 8 + network_config = train.NetworkConfig( + determinants=2, hidden_units=((64, 16),) * 3) + optim_config = train.OptimConfig(iterations=10, use_kfac=False) + result_directory = self.create_tempdir().full_path + + # First run + train.train( + atoms, + electrons, + batch_size=batch_size, + network_config=network_config, + pretrain_config=train.PretrainConfig(iterations=20), + optim_config=optim_config, + mcmc_config=train.MCMCConfig(steps=10), + logging_config=train.LoggingConfig( + result_path=os.path.join(result_directory, 'run1')), + multi_gpu=False) + + # Restart from first run + # Already done pretraining and burn in, so just want to resume training + # immediately. + tf.reset_default_graph() + train.train( + atoms, + electrons, + batch_size=batch_size, + network_config=network_config, + pretrain_config=train.PretrainConfig(iterations=0), + optim_config=optim_config, + mcmc_config=train.MCMCConfig(burn_in=0, steps=10), + logging_config=train.LoggingConfig( + result_path=os.path.join(result_directory, 'run2'), + restore_path=os.path.join(result_directory, 'run1', 'checkpoints') + ), + multi_gpu=False) + + +class AssignElectronsTest(parameterized.TestCase): + + def _expected_result(self, molecule, e_per_atom): + nelectrons = [sum(spin_electrons) for spin_electrons in zip(*e_per_atom)] + if nelectrons[0] == nelectrons[1]: + for electrons in e_per_atom: + if electrons[0] < electrons[1]: + flip = True + break + elif electrons[0] > electrons[1]: + flip = False + break + if flip: + e_per_atom = [electrons[::-1] for electrons in e_per_atom] + return np.concatenate( + [np.tile(atom.coords, e[0]) for atom, e in zip(molecule, e_per_atom)] + + [np.tile(atom.coords, e[1]) for atom, e in zip(molecule, e_per_atom)]) + + @parameterized.parameters( + { + 'molecule': [('O', 0, 0, 0)], + 'electrons': (5, 3), + 'electrons_per_atom': ((5, 3),), + }, + { + 'molecule': [('O', 0, 0, 0), ('N', 1.2, 0, 0)], + 'electrons': (8, 7), + 'electrons_per_atom': ((3, 5), (5, 2)), + }, + { + 'molecule': [('O', 0, 0, 0), ('N', 1.2, 0, 0)], + 'electrons': (7, 8), + 'electrons_per_atom': ((5, 3), (2, 5)), + }, + { + 'molecule': [('O', 0, 0, 0), ('N', 1.2, 0, 0)], + 'electrons': (9, 7), + 'electrons_per_atom': ((4, 5), (5, 2)), + }, + { + 'molecule': [('O', 0, 0, 0), ('N', 1.2, 0, 0)], + 'electrons': (10, 7), + 'electrons_per_atom': ((5, 5), (5, 2)), + }, + { + 'molecule': [('O', 0, 0, 0), ('N', 1.2, 0, 0)], + 'electrons': (7, 7), + 'electrons_per_atom': ((5, 3), (2, 4)), + }, + { + 'molecule': [('O', 0, 0, 0), ('O', 1.2, 0, 0)], + 'electrons': (9, 7), + 'electrons_per_atom': ((4, 4), (5, 3)), + }, + { + 'molecule': [('O', 0, 0, 0), ('O', 20, 0, 0)], + 'electrons': (10, 6), + 'electrons_per_atom': ((5, 3), (5, 3)), + }, + { + 'molecule': [('H', 0, 0, 0), ('H', 1.2, 0, 0)], + 'electrons': (1, 1), + 'electrons_per_atom': ((1, 0), (0, 1)), + }, + { + 'molecule': [('B', 0, 0, 0), ('H', 1.2, 0, 0), ('H', -0.6, 0.6, 0), + ('H', -0.6, -0.6, 0)], + 'electrons': (4, 4), + 'electrons_per_atom': ((3, 2), (0, 1), (1, 0), (0, 1)), + }, + { + 'molecule': [('B', 0, 0, 0), ('H', 1.2, 0, 0)], + 'electrons': (3, 2), + 'electrons_per_atom': ((3, 2), (0, 0)), + }, + { + 'molecule': [('B', 0, 0, 0), ('H', 1.2, 0, 0), ('H', -0.6, 0.6, 0)], + 'electrons': (3, 2), + 'electrons_per_atom': ((3, 2), (0, 0), (0, 0)), + }, + ) + def test_assign_electrons(self, molecule, electrons, electrons_per_atom): + molecule = [system.Atom(v[0], v[1:]) for v in molecule] + e_means = train.assign_electrons(molecule, electrons) + expected_e_means = self._expected_result(molecule, electrons_per_atom) + np.testing.assert_allclose(e_means, expected_e_means) + + e_means = train.assign_electrons(molecule[::-1], electrons) + if any(atom.symbol != molecule[0].symbol for atom in molecule): + expected_e_means = self._expected_result(molecule[::-1], + electrons_per_atom[::-1]) + else: + expected_e_means = self._expected_result(molecule[::-1], + electrons_per_atom) + np.testing.assert_allclose(e_means, expected_e_means) + + +if __name__ == '__main__': + tf.test.main() diff --git a/ferminet/tests/units_test.py b/ferminet/tests/units_test.py new file mode 100644 index 0000000..40abbde --- /dev/null +++ b/ferminet/tests/units_test.py @@ -0,0 +1,83 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ferminet.utils.units.""" + +from absl.testing import absltest +from ferminet.utils import units +import numpy as np + + +class UnitsTest(absltest.TestCase): + + def test_angstrom2bohr(self): + self.assertAlmostEqual(units.angstrom2bohr(2), 3.77945225091, places=10) + + def test_angstrom2bohr_numpy(self): + x = np.random.uniform(size=(3,)) + x1 = units.angstrom2bohr(x) + x2 = np.array([units.angstrom2bohr(v) for v in x]) + np.testing.assert_allclose(x1, x2) + + def test_bohr2angstrom(self): + self.assertAlmostEqual(units.bohr2angstrom(2), 1.05835442134, places=10) + + def test_bohr2angstrom_numpy(self): + x = np.random.uniform(size=(3,)) + x1 = units.bohr2angstrom(x) + x2 = np.array([units.bohr2angstrom(v) for v in x]) + np.testing.assert_allclose(x1, x2) + + def test_angstrom_bohr_idempotent(self): + x = np.random.uniform() + x1 = units.bohr2angstrom(units.angstrom2bohr(x)) + self.assertAlmostEqual(x, x1, places=10) + + def test_bohr_angstrom_idempotent(self): + x = np.random.uniform() + x1 = units.angstrom2bohr(units.bohr2angstrom(x)) + self.assertAlmostEqual(x, x1, places=10) + + def test_hartree2kcal(self): + self.assertAlmostEqual(units.hartree2kcal(2), 1255.018948, places=10) + + def test_hartree2kcal_numpy(self): + x = np.random.uniform(size=(3,)) + x1 = units.hartree2kcal(x) + x2 = np.array([units.hartree2kcal(v) for v in x]) + np.testing.assert_allclose(x1, x2) + + def test_kcal2hartree(self): + self.assertAlmostEqual(units.kcal2hartree(2), 0.00318720287, places=10) + + def test_kcal2hartree_numpy(self): + x = np.random.uniform(size=(3,)) + x1 = units.kcal2hartree(x) + x2 = np.array([units.kcal2hartree(v) for v in x]) + np.testing.assert_allclose(x1, x2) + + def test_hartree_kcal_idempotent(self): + x = np.random.uniform() + x1 = units.kcal2hartree(units.hartree2kcal(x)) + self.assertAlmostEqual(x, x1, places=10) + + def test_kcal_hartree_idempotent(self): + x = np.random.uniform() + x1 = units.hartree2kcal(units.kcal2hartree(x)) + self.assertAlmostEqual(x, x1, places=10) + + +if __name__ == '__main__': + absltest.main() diff --git a/ferminet/train.py b/ferminet/train.py new file mode 100644 index 0000000..75772d2 --- /dev/null +++ b/ferminet/train.py @@ -0,0 +1,542 @@ +# Lint as: python3 +# Copyright 2018 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Learn ground state wavefunctions for molecular systems using VMC.""" + +import copy +import os +from typing import Any, Mapping, Optional, Sequence, Tuple + +from absl import logging +import attr + +from ferminet import hamiltonian +from ferminet import mcmc +from ferminet import mean_corrected_kfac_opt +from ferminet import networks +from ferminet import qmc +from ferminet.utils import elements +from ferminet.utils import scf +from ferminet.utils import system + +import numpy as np +import tensorflow.compat.v1 as tf + + +def _validate_directory(obj, attribute, value): + """Validates value is a directory.""" + del obj + if value and not os.path.isdir(value): + raise ValueError(f'{attribute.name} is not a directory') + + +@attr.s(auto_attribs=True) +class LoggingConfig: + """Logging information for Fermi Nets. + + Attributes: + result_path: directory to use for saving model parameters and calculations + results. Created if does not exist. + save_frequency: frequency (in minutes) at which parameters are saved. + restore_path: directory to use for restoring model parameters. + stats_frequency: frequency (in iterations) at which statistics (via stats + hooks) are updated and stored. + replicas: the number of replicas used during training. Will be set + automatically. + walkers: If true, log walkers at every step. + wavefunction: If true, log wavefunction at every step. + local_energy: If true, log local energy at every step. + config: dictionary of additional information about the calculation setup. + Reported along with calculation statistics. + """ + result_path: str = '.' + save_frequency: float = 10 + restore_path: str = attr.ib(default=None, validator=_validate_directory) + stats_frequency: int = 1 + replicas: int = 1 + walkers: bool = False + wavefunction: bool = False + local_energy: bool = False + config: Mapping[str, Any] = attr.ib(converter=dict, + default=attr.Factory(dict)) + + +@attr.s(auto_attribs=True) +class MCMCConfig: + """Markov Chain Monte Carlo configuration for Fermi Nets. + + Attributes: + burn_in: Number of burn in steps after pretraining. + steps: 'Number of MCMC steps to make between network updates. + init_width: Width of (atom-centred) Gaussians used to generate initial + electron configurations. + move_width: Width of Gaussian used for random moves. + init_means: Iterable of 3*nelectrons giving the mean initial position of + each electron. Configurations are drawn using Gaussians of width + init_width at each 3D position. Alpha electrons are listed before beta + electrons. If empty, electrons are assigned to atoms based upon the + isolated atom spin configuration. Expert use only. + """ + burn_in: int = 100 + steps: int = 10 + init_width: float = 0.8 + move_width: float = 0.02 + init_means: Optional[Sequence[float]] = None + + +@attr.s(auto_attribs=True) +class PretrainConfig: + """Hartree-Fock pretraining algorithm configuration for Fermi Nets. + + Attributes: + iterations: Number of iterations for which to pretrain the network to match + Hartree-Fock orbitals. + basis: Basis set used to run Hartree-Fock calculation in PySCF. + """ + iterations: int = 1000 + basis: str = 'sto-3g' + + +@attr.s(auto_attribs=True) +class OptimConfig: + """Optimization configuration for Fermi Nets. + + Attributes: + iterations: Number of iterations') + clip_el: If not none, scale at which to clip local energy. + learning_rate: learning rate. + learning_rate_decay: exponent of learning rate decay. + learning_rate_delay: scale of the rate decay. + use_kfac: Use the K-FAC optimizer if true, ADAM optimizer otherwise. + check_loss: Apply gradient update only if the loss is not NaN. If true, + training could be slightly slower but the checkpoint written out when a + NaN is detected will be with the network weights which led to the NaN. + deterministic: CPU only mode that also enforces determinism. Will run + *significantly* slower if used. + """ + iterations: int = 1000000 + learning_rate: float = 1.e-4 + learning_rate_decay: float = 1.0 + learning_rate_delay: float = 10000.0 + clip_el: float = 5.0 + use_kfac: bool = True + check_loss: bool = False + deterministic: bool = False + + +@attr.s(auto_attribs=True) +class KfacConfig: + """K-FAC configuration - see docs at https://github.com/tensorflow/kfac/.""" + invert_every: int = 1 + cov_update_every: int = 1 + damping: float = 0.001 + cov_ema_decay: float = 0.95 + momentum: float = 0.0 + momentum_type: str = attr.ib( + default='regular', + validator=attr.validators.in_( + ['regular', 'adam', 'qmodel', 'qmodel_fixedmu'])) + adapt_damping: bool = False + damping_adaptation_decay: float = 0.9 + damping_adaptation_interval: int = 5 + min_damping: float = 1.e-5 + norm_constraint: float = 0.001 + + +@attr.s(auto_attribs=True) +class NetworkConfig: + """Network configuration for Fermi Net. + + Attributes: + architecture: The choice of architecture to run the calculation with. Either + "ferminet" or "slater" for the Fermi Net and standard Slater determinant + respectively. + hidden_units: Number of hidden units in each layer of the network. If + the Fermi Net with one- and two-electron streams is used, a tuple is + provided for each layer, with the first element giving the number of + hidden units in the one-electron stream and the second element giving the + number of units in the two-electron stream. Otherwise, each layer is + represented by a single integer. + determinants: Number of determinants to use. + r12_en_features: Include r12/distance features between electrons and nuclei. + Highly recommended. + r12_ee_features: Include r12/distance features between pairs of electrons. + Highly recommended. + pos_ee_features: Include electron-electron position features. Highly + recommended. + use_envelope: Include multiplicative exponentially-decaying envelopes on + each orbital. Calculations will not converge if set to False. + backflow: Include backflow transformation in input coordinates. ' + Only for use if network_architecture == "slater". Implies build_backflow + is also True. + build_backflow: Create backflow weights but do not include backflow + coordinate transformation in the netwrok. Use to train a Slater-Jastrow + architecture and then train a Slater-Jastrow-Backflow architecture + based on it in a two-stage optimization process. + residual: Use residual connections in network. Recommended. + after_det: Number of hidden units in each layer of the neural network after + the determinants. By default, just takes a weighted sum of + determinants with no nonlinearity. + jastrow_en: Include electron-nuclear Jastrow factor. Only relevant with + Slater-Jastrow-Backflow architectures. + jastrow_ee: Include electron-electron Jastrow factor. Only relevant with + Slater-Jastrow-Backflow architectures. + jastrow_een: Include electron-electron-nuclear Jastrow factor. Only + relevant with Slater-Jastrow-Backflow architectures. + """ + architecture: str = attr.ib( + default='ferminet', validator=attr.validators.in_(['ferminet', 'slater'])) + hidden_units: Sequence[Tuple[int, int]] = ((256, 32),) * 4 + determinants: int = 16 + r12_en_features: bool = True + r12_ee_features: bool = True + pos_ee_features: bool = True + use_envelope: bool = True + backflow: bool = False + build_backflow: bool = False + residual: bool = True + after_det: Sequence[int] = (1,) + jastrow_en: bool = False + jastrow_ee: bool = False + jastrow_een: bool = False + + +def assign_electrons(molecule, electrons): + """Assigns electrons to atoms using non-interacting spin configurations. + + Args: + molecule: List of Hamiltonian.Atom objects for each atom in the system. + electrons: Pair of ints giving number of alpha (spin-up) and beta + (spin-down) electrons. + + Returns: + 1D np.ndarray of length 3N containing initial mean positions of each + electron based upon the atom positions, where N is the total number of + electrons. The first 3*electrons[0] positions correspond to the alpha + (spin-up) electrons and the next 3*electrons[1] to the beta (spin-down) + electrons. + + Raises: + RuntimeError: if a different number of electrons or different spin + polarisation is generated. + """ + # Assign electrons based upon unperturbed atoms and ignore impact of + # fractional nuclear charge. + nuclei = [int(round(atom.charge)) for atom in molecule] + total_charge = sum(nuclei) - sum(electrons) + # Construct a dummy iso-electronic neutral system. + neutral_molecule = [copy.copy(atom) for atom in molecule] + if total_charge != 0: + logging.warning( + 'Charged system. Using heuristics to set initial electron positions') + charge = 1 if total_charge > 0 else -1 + while total_charge != 0: + # Poor proxy for electronegativity. + atom_index = nuclei.index(max(nuclei) if total_charge < 0 else min(nuclei)) + atom = neutral_molecule[atom_index] + atom.charge -= charge + atom.atomic_number = int(round(atom.charge)) + if int(round(atom.charge)) == 0: + neutral_molecule.pop(atom_index) + else: + atom.symbol = elements.ATOMIC_NUMS[atom.atomic_number].symbol + total_charge -= charge + nuclei = [int(round(atom.charge)) for atom in neutral_molecule] + + spin_pol = lambda electrons: electrons[0] - electrons[1] + abs_spin_pol = abs(spin_pol(electrons)) + if len(neutral_molecule) == 1: + elecs_atom = [electrons] + else: + elecs_atom = [] + spin_pol_assigned = 0 + for ion in neutral_molecule: + # Greedily assign up and down electrons based upon the ground state spin + # configuration of an isolated atom. + atom_spin_pol = elements.ATOMIC_NUMS[ion.atomic_number].spin_config + nelec = ion.atomic_number + na = (nelec + atom_spin_pol) // 2 + nb = nelec - na + # Attempt to keep spin polarisation as close to 0 as possible. + if (spin_pol_assigned > 0 and + spin_pol_assigned + atom_spin_pol > abs_spin_pol): + elec_atom = [nb, na] + else: + elec_atom = [na, nb] + spin_pol_assigned += spin_pol(elec_atom) + elecs_atom.append(elec_atom) + + electrons_assigned = [sum(e) for e in zip(*elecs_atom)] + spin_pol_assigned = spin_pol(electrons_assigned) + if np.sign(spin_pol_assigned) == -np.sign(abs_spin_pol): + # Started with the wrong guess for spin-up vs spin-down. + elecs_atom = [e[::-1] for e in elecs_atom] + spin_pol_assigned = -spin_pol_assigned + + if spin_pol_assigned != abs_spin_pol: + logging.info('Spin polarisation does not match isolated atoms. ' + 'Using heuristics to set initial electron positions.') + while spin_pol_assigned != abs_spin_pol: + atom_spin_pols = [abs(spin_pol(e)) for e in elecs_atom] + atom_index = atom_spin_pols.index(max(atom_spin_pols)) + elec_atom = elecs_atom[atom_index] + if spin_pol_assigned < abs_spin_pol and elec_atom[0] <= elec_atom[1]: + elec_atom[0] += 1 + elec_atom[1] -= 1 + spin_pol_assigned += 2 + elif spin_pol_assigned < abs_spin_pol and elec_atom[0] > elec_atom[1]: + elec_atom[0] -= 1 + elec_atom[1] += 1 + spin_pol_assigned += 2 + elif spin_pol_assigned > abs_spin_pol and elec_atom[0] > elec_atom[1]: + elec_atom[0] -= 1 + elec_atom[1] += 1 + spin_pol_assigned -= 2 + else: + elec_atom[0] += 1 + elec_atom[1] -= 1 + spin_pol_assigned -= 2 + + electrons_assigned = [sum(e) for e in zip(*elecs_atom)] + if spin_pol(electrons_assigned) == -spin_pol(electrons): + elecs_atom = [e[::-1] for e in elecs_atom] + electrons_assigned = electrons_assigned[::-1] + + logging.info( + 'Electrons assigned %s.', ', '.join([ + '{}: {}'.format(atom.symbol, elec_atom) + for atom, elec_atom in zip(molecule, elecs_atom) + ])) + if any(e != e_assign for e, e_assign in zip(electrons, electrons_assigned)): + raise RuntimeError( + 'Assigned incorrect number of electrons ([%s instead of %s]' % + (electrons_assigned, electrons)) + if any(min(ne) < 0 for ne in zip(*elecs_atom)): + raise RuntimeError('Assigned negative number of electrons!') + electron_positions = np.concatenate([ + np.tile(atom.coords, e[0]) + for atom, e in zip(neutral_molecule, elecs_atom) + ] + [ + np.tile(atom.coords, e[1]) + for atom, e in zip(neutral_molecule, elecs_atom) + ]) + return electron_positions + + +def train(molecule: Sequence[system.Atom], + spins: Tuple[int, int], + batch_size: int, + network_config: Optional[NetworkConfig] = None, + pretrain_config: Optional[PretrainConfig] = None, + optim_config: Optional[OptimConfig] = None, + kfac_config: Optional[KfacConfig] = None, + mcmc_config: Optional[MCMCConfig] = None, + logging_config: Optional[LoggingConfig] = None, + multi_gpu: bool = False, + double_precision: bool = False, + graph_path: Optional[str] = None): + """Configures and runs training loop. + + Args: + molecule: molecule description. + spins: pair of ints specifying number of spin-up and spin-down electrons + respectively. + batch_size: batch size. Also referred to as the number of Markov Chain Monte + Carlo configurations/walkers. + network_config: network configuration. Default settings in NetworkConfig are + used if not specified. + pretrain_config: pretraining configuration. Default settings in + PretrainConfig are used if not specified. + optim_config: optimization configuration. Default settings in OptimConfig + are used if not specified. + kfac_config: K-FAC configuration. Default settings in KfacConfig are used if + not specified. + mcmc_config: Markov Chain Monte Carlo configuration. Default settings in + MCMCConfig are used if not specified. + logging_config: logging and checkpoint configuration. Default settings in + LoggingConfig are used if not specified. + multi_gpu: Use all available GPUs. Default: use only a single GPU. + double_precision: use tf.float64 instead of tf.float32 for all operations. + Warning - double precision is not currently functional with K-FAC. + graph_path: directory to save a representation of the TF graph to. Not saved + + Raises: + RuntimeError: if mcmc_config.init_means is supplied but is of the incorrect + length. + """ + + if not mcmc_config: + mcmc_config = MCMCConfig() + if not logging_config: + logging_config = LoggingConfig() + if not pretrain_config: + pretrain_config = PretrainConfig() + if not optim_config: + optim_config = OptimConfig() + if not kfac_config: + kfac_config = KfacConfig() + if not network_config: + network_config = NetworkConfig() + + nelectrons = sum(spins) + precision = tf.float64 if double_precision else tf.float32 + + if multi_gpu: + strategy = tf.distribute.MirroredStrategy() + else: + # Get the default (single-device) strategy. + strategy = tf.distribute.get_strategy() + if multi_gpu: + batch_size = batch_size // strategy.num_replicas_in_sync + logging.info('Setting per-GPU batch size to %s.', batch_size) + logging_config.replicas = strategy.num_replicas_in_sync + logging.info('Running on %s replicas.', strategy.num_replicas_in_sync) + + # Create a re-entrant variable scope for network. + with tf.variable_scope('model') as model: + pass + + with strategy.scope(): + with tf.variable_scope(model, auxiliary_name_scope=False) as model1: + with tf.name_scope(model1.original_name_scope): + fermi_net = networks.FermiNet( + atoms=molecule, + nelectrons=spins, + slater_dets=network_config.determinants, + hidden_units=network_config.hidden_units, + after_det=network_config.after_det, + architecture=network_config.architecture, + r12_ee_features=network_config.r12_ee_features, + r12_en_features=network_config.r12_en_features, + pos_ee_features=network_config.pos_ee_features, + build_backflow=network_config.build_backflow, + use_backflow=network_config.backflow, + jastrow_en=network_config.jastrow_en, + jastrow_ee=network_config.jastrow_ee, + jastrow_een=network_config.jastrow_een, + logdet=True, + envelope=network_config.use_envelope, + residual=network_config.residual, + pretrain_iterations=pretrain_config.iterations) + + scf_approx = scf.Scf( + molecule, + nelectrons=spins, + restricted=False, + basis=pretrain_config.basis) + if pretrain_config.iterations > 0: + scf_approx.run() + + hamiltonian_ops = hamiltonian.operators(molecule, nelectrons) + if mcmc_config.init_means: + if len(mcmc_config.init_means) != 3 * nelectrons: + raise RuntimeError('Initial electron positions of incorrect shape. ' + '({} not {})'.format( + len(mcmc_config.init_means), 3 * nelectrons)) + init_means = [float(x) for x in mcmc_config.init_means] + else: + init_means = assign_electrons(molecule, spins) + + # Build the MCMC state inside the same variable scope as the network. + with tf.variable_scope(model, auxiliary_name_scope=False) as model1: + with tf.name_scope(model1.original_name_scope): + data_gen = mcmc.MCMC( + fermi_net, + batch_size, + init_mu=init_means, + init_sigma=mcmc_config.init_width, + move_sigma=mcmc_config.move_width, + dtype=precision) + with tf.variable_scope('HF_data_gen'): + hf_data_gen = mcmc.MCMC( + scf_approx.tf_eval_slog_hartree_product, + batch_size, + init_mu=init_means, + init_sigma=mcmc_config.init_width, + move_sigma=mcmc_config.move_width, + dtype=precision) + + with tf.name_scope('learning_rate_schedule'): + global_step = tf.train.get_or_create_global_step() + lr = optim_config.learning_rate * tf.pow( + (1.0 / (1.0 + (tf.cast(global_step, tf.float32) / + optim_config.learning_rate_delay))), + optim_config.learning_rate_decay) + + if optim_config.learning_rate < 1.e-10: + logging.warning('Learning rate less than 10^-10. Not using an optimiser.') + optim_fn = lambda _: None + update_cached_data = None + elif optim_config.use_kfac: + cached_data = tf.get_variable( + 'MCMC_cache', + initializer=tf.zeros(shape=data_gen.walkers.shape, dtype=precision), + use_resource=True, + trainable=False, + dtype=precision, + ) + if kfac_config.adapt_damping: + update_cached_data = tf.assign(cached_data, data_gen.walkers) + else: + update_cached_data = None + optim_fn = lambda layer_collection: mean_corrected_kfac_opt.MeanCorrectedKfacOpt( # pylint: disable=g-long-lambda + invert_every=kfac_config.invert_every, + cov_update_every=kfac_config.cov_update_every, + learning_rate=lr, + norm_constraint=kfac_config.norm_constraint, + damping=kfac_config.damping, + cov_ema_decay=kfac_config.cov_ema_decay, + momentum=kfac_config.momentum, + momentum_type=kfac_config.momentum_type, + loss_fn=lambda x: tf.nn.l2_loss(fermi_net(x)[0]), + train_batch=data_gen.walkers, + prev_train_batch=cached_data, + layer_collection=layer_collection, + batch_size=batch_size, + adapt_damping=kfac_config.adapt_damping, + is_chief=True, + damping_adaptation_decay=kfac_config.damping_adaptation_decay, + damping_adaptation_interval=kfac_config.damping_adaptation_interval, + min_damping=kfac_config.min_damping, + use_passed_loss=False, + estimation_mode='exact', + ) + else: + adam = tf.train.AdamOptimizer(lr) + optim_fn = lambda _: adam + update_cached_data = None + + qmc_net = qmc.QMC( + hamiltonian_ops, + fermi_net, + data_gen, + hf_data_gen, + clip_el=optim_config.clip_el, + check_loss=optim_config.check_loss, + ) + + qmc_net.train( + optim_fn, + optim_config.iterations, + logging_config, + using_kfac=optim_config.use_kfac, + strategy=strategy, + scf_approx=scf_approx, + global_step=global_step, + determinism_mode=optim_config.deterministic, + cached_data_op=update_cached_data, + write_graph=os.path.abspath(graph_path) if graph_path else None, + burn_in=mcmc_config.burn_in, + mcmc_steps=mcmc_config.steps, + ) diff --git a/ferminet/utils/analysis_tools.py b/ferminet/utils/analysis_tools.py new file mode 100644 index 0000000..dbe318a --- /dev/null +++ b/ferminet/utils/analysis_tools.py @@ -0,0 +1,114 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for reading and analysing QMC data.""" + +from typing import Iterable, Optional, Text, Union + +from absl import logging +import numpy as np +import pandas as pd + +from pyblock import pd_utils as blocking + + +def _format_network(network_option: Union[int, Iterable[int]]) -> Text: + """Formats a network configuration to a (short) string. + + Args: + network_option: a integer or iterable of integers. + + Returns: + String representation of the network option. If the network option is an + iterable of the form [V, V, ...], return NxV, where N is the length of the + iterable. + """ + try: + # pytype doesn't handle try...except TypeError gracefully. + if all(xi == network_option[0] for xi in network_option[1:]): # pytype: disable=unsupported-operands + return '{}x{}'.format(len(network_option), network_option[0]) # pytype: disable=unsupported-operands,wrong-arg-types + else: + return '{}'.format(network_option) + except TypeError: + return '{}'.format(network_option) + + +def estimate_stats(df: pd.DataFrame, + burn_in: int, + groups: Optional[Iterable[Text]] = None, + group_by_work_unit: bool = True) -> pd.DataFrame: + """Estimates statistics for the (local) energy. + + Args: + df: pd.DataFrame containing local energy data in the 'eigenvalues' column. + burn_in: number of data points to discard before accumulating statistics to + allow for learning and equilibration time. + groups: list of column names in df to group by. The statistics for each + group are returned, along with the corresponding data for the group. The + group columns should be sufficient to distinguish between separate work + units/calculations in df. + group_by_work_unit: add 'work_unit_id' to the list of groups if not already + present and 'work_unit_id' is a column in df. This is usually helpful + for safety, when each work unit is a separate calculation and should be + treated separately statistically. + + Returns: + pandas DataFrame containing estimates of the mean, standard error and error + in the standard error from a blocking analysis of the local energy for each + group in df. + + Raises: + RuntimeError: If groups is empty or None and group_by_work_unit is False. If + df does not contain a key to group over, insert a dummy column with + identical values or use pyblock directly. + """ + wid = 'work_unit_id' + if groups is None: + groups = [] + else: + groups = list(groups) + if group_by_work_unit and wid not in groups and wid in df.columns: + groups.append(wid) + if not groups: + raise RuntimeError( + 'Must group by at least one key or set group_by_work_unit to True.') + if len(groups) == 1: + index_dict = {'index': groups[0]} + else: + index_dict = {'level_{}'.format(i): group for i, group in enumerate(groups)} + stats_dict = { + 'mean': 'energy', + 'standard error': 'stderr', + 'standard error error': 'stderrerr' + } + def block(key, values): + blocked = blocking.reblock_summary(blocking.reblock(values)[1]) + if not blocked.empty: + return blocked.iloc[0] + else: + logging.warning('Reblocking failed to estimate statistics for %s.', key) + return pd.Series({statistic: np.nan for statistic in stats_dict}) + stats = ( + pd.DataFrame.from_dict({ + n: block(n, d.eigenvalues[burn_in:]) + for n, d in df.groupby(groups) if not d[burn_in:].eigenvalues.empty + }, orient='index') + .reset_index() + .rename(index_dict, axis=1) + .rename(stats_dict, axis=1) + ) + stats = stats.sort_values(by=groups).reset_index(drop=True) + stats['burn_in'] = burn_in + return stats diff --git a/ferminet/utils/elements.py b/ferminet/utils/elements.py new file mode 100644 index 0000000..0faa914 --- /dev/null +++ b/ferminet/utils/elements.py @@ -0,0 +1,221 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Basic data on chemical elements.""" + +import collections + +import attr + + +@attr.s +class Element: + """Chemical element. + + Attributes: + symbol: official symbol of element. + atomic_number: atomic number of element. + period: period to which the element belongs. + """ + symbol = attr.ib() + atomic_number = attr.ib() + period = attr.ib() + + @property + def group(self): + """Group to which element belongs. Set to -1 for actines and lanthanides.""" + is_lanthanide = (58 <= self.atomic_number <= 71) + is_actinide = (90 <= self.atomic_number <= 103) + if is_lanthanide or is_actinide: + return -1 + if self.symbol == 'He': + # n=1 shell only has s orbital -> He is a noble gas. + return 18 + period_starts = (1, 3, 11, 19, 37, 55, 87) + period_start = period_starts[self.period - 1] + group_ = self.atomic_number - period_start + 1 + # Adjust for absence of d block in periods 2 and 3. + if self.period < 4 and group_ > 2: + group_ += 10 + # Adjust for Lanthanides and Actinides in periods 6 and 7. + if self.period >= 6 and group_ > 3: + group_ -= 14 + return group_ + + @property + def spin_config(self): + """Canonical spin configuration (via Hund's rules) of neutral atom. + + Returns: + Number of unpaired electrons (as required by PySCF) in the neutral atom's + ground state. + """ + unpaired = {1: 1, 2: 0, 13: 1, 14: 2, 15: 3, 16: 2, 17: 1, 18: 0} + if self.group in unpaired: + return unpaired[self.group] + else: + raise NotImplementedError( + 'Spin configuration for transition metals not set.') + + +# Atomic symbols for all known elements +# Generated using +# def _element(symbol, atomic_number): +# # period_start[n] = atomic number of group 1 element in (n+1)-th period. +# period_start = (1, 3, 11, 19, 37, 55, 87) +# for p, group1_no in enumerate(period_start): +# if atomic_number < group1_no: +# # In previous period but n is 0-based. +# period = p +# break +# else: +# period = p + 1 +# return Element(symbol=symbol, atomic_number=atomic_number, period=period) +# [_element(s, n+1) for n, s in enumerate(symbols)] +# where symbols is the list of chemical symbols of all elements. +_ELEMENTS = ( + Element(symbol='H', atomic_number=1, period=1), + Element(symbol='He', atomic_number=2, period=1), + Element(symbol='Li', atomic_number=3, period=2), + Element(symbol='Be', atomic_number=4, period=2), + Element(symbol='B', atomic_number=5, period=2), + Element(symbol='C', atomic_number=6, period=2), + Element(symbol='N', atomic_number=7, period=2), + Element(symbol='O', atomic_number=8, period=2), + Element(symbol='F', atomic_number=9, period=2), + Element(symbol='Ne', atomic_number=10, period=2), + Element(symbol='Na', atomic_number=11, period=3), + Element(symbol='Mg', atomic_number=12, period=3), + Element(symbol='Al', atomic_number=13, period=3), + Element(symbol='Si', atomic_number=14, period=3), + Element(symbol='P', atomic_number=15, period=3), + Element(symbol='S', atomic_number=16, period=3), + Element(symbol='Cl', atomic_number=17, period=3), + Element(symbol='Ar', atomic_number=18, period=3), + Element(symbol='K', atomic_number=19, period=4), + Element(symbol='Ca', atomic_number=20, period=4), + Element(symbol='Sc', atomic_number=21, period=4), + Element(symbol='Ti', atomic_number=22, period=4), + Element(symbol='V', atomic_number=23, period=4), + Element(symbol='Cr', atomic_number=24, period=4), + Element(symbol='Mn', atomic_number=25, period=4), + Element(symbol='Fe', atomic_number=26, period=4), + Element(symbol='Co', atomic_number=27, period=4), + Element(symbol='Ni', atomic_number=28, period=4), + Element(symbol='Cu', atomic_number=29, period=4), + Element(symbol='Zn', atomic_number=30, period=4), + Element(symbol='Ga', atomic_number=31, period=4), + Element(symbol='Ge', atomic_number=32, period=4), + Element(symbol='As', atomic_number=33, period=4), + Element(symbol='Se', atomic_number=34, period=4), + Element(symbol='Br', atomic_number=35, period=4), + Element(symbol='Kr', atomic_number=36, period=4), + Element(symbol='Rb', atomic_number=37, period=5), + Element(symbol='Sr', atomic_number=38, period=5), + Element(symbol='Y', atomic_number=39, period=5), + Element(symbol='Zr', atomic_number=40, period=5), + Element(symbol='Nb', atomic_number=41, period=5), + Element(symbol='Mo', atomic_number=42, period=5), + Element(symbol='Tc', atomic_number=43, period=5), + Element(symbol='Ru', atomic_number=44, period=5), + Element(symbol='Rh', atomic_number=45, period=5), + Element(symbol='Pd', atomic_number=46, period=5), + Element(symbol='Ag', atomic_number=47, period=5), + Element(symbol='Cd', atomic_number=48, period=5), + Element(symbol='In', atomic_number=49, period=5), + Element(symbol='Sn', atomic_number=50, period=5), + Element(symbol='Sb', atomic_number=51, period=5), + Element(symbol='Te', atomic_number=52, period=5), + Element(symbol='I', atomic_number=53, period=5), + Element(symbol='Xe', atomic_number=54, period=5), + Element(symbol='Cs', atomic_number=55, period=6), + Element(symbol='Ba', atomic_number=56, period=6), + Element(symbol='La', atomic_number=57, period=6), + Element(symbol='Ce', atomic_number=58, period=6), + Element(symbol='Pr', atomic_number=59, period=6), + Element(symbol='Nd', atomic_number=60, period=6), + Element(symbol='Pm', atomic_number=61, period=6), + Element(symbol='Sm', atomic_number=62, period=6), + Element(symbol='Eu', atomic_number=63, period=6), + Element(symbol='Gd', atomic_number=64, period=6), + Element(symbol='Tb', atomic_number=65, period=6), + Element(symbol='Dy', atomic_number=66, period=6), + Element(symbol='Ho', atomic_number=67, period=6), + Element(symbol='Er', atomic_number=68, period=6), + Element(symbol='Tm', atomic_number=69, period=6), + Element(symbol='Yb', atomic_number=70, period=6), + Element(symbol='Lu', atomic_number=71, period=6), + Element(symbol='Hf', atomic_number=72, period=6), + Element(symbol='Ta', atomic_number=73, period=6), + Element(symbol='W', atomic_number=74, period=6), + Element(symbol='Re', atomic_number=75, period=6), + Element(symbol='Os', atomic_number=76, period=6), + Element(symbol='Ir', atomic_number=77, period=6), + Element(symbol='Pt', atomic_number=78, period=6), + Element(symbol='Au', atomic_number=79, period=6), + Element(symbol='Hg', atomic_number=80, period=6), + Element(symbol='Tl', atomic_number=81, period=6), + Element(symbol='Pb', atomic_number=82, period=6), + Element(symbol='Bi', atomic_number=83, period=6), + Element(symbol='Po', atomic_number=84, period=6), + Element(symbol='At', atomic_number=85, period=6), + Element(symbol='Rn', atomic_number=86, period=6), + Element(symbol='Fr', atomic_number=87, period=7), + Element(symbol='Ra', atomic_number=88, period=7), + Element(symbol='Ac', atomic_number=89, period=7), + Element(symbol='Th', atomic_number=90, period=7), + Element(symbol='Pa', atomic_number=91, period=7), + Element(symbol='U', atomic_number=92, period=7), + Element(symbol='Np', atomic_number=93, period=7), + Element(symbol='Pu', atomic_number=94, period=7), + Element(symbol='Am', atomic_number=95, period=7), + Element(symbol='Cm', atomic_number=96, period=7), + Element(symbol='Bk', atomic_number=97, period=7), + Element(symbol='Cf', atomic_number=98, period=7), + Element(symbol='Es', atomic_number=99, period=7), + Element(symbol='Fm', atomic_number=100, period=7), + Element(symbol='Md', atomic_number=101, period=7), + Element(symbol='No', atomic_number=102, period=7), + Element(symbol='Lr', atomic_number=103, period=7), + Element(symbol='Rf', atomic_number=104, period=7), + Element(symbol='Db', atomic_number=105, period=7), + Element(symbol='Sg', atomic_number=106, period=7), + Element(symbol='Bh', atomic_number=107, period=7), + Element(symbol='Hs', atomic_number=108, period=7), + Element(symbol='Mt', atomic_number=109, period=7), + Element(symbol='Ds', atomic_number=110, period=7), + Element(symbol='Rg', atomic_number=111, period=7), + Element(symbol='Cn', atomic_number=112, period=7), + Element(symbol='Nh', atomic_number=113, period=7), + Element(symbol='Fl', atomic_number=114, period=7), + Element(symbol='Mc', atomic_number=115, period=7), + Element(symbol='Lv', atomic_number=116, period=7), + Element(symbol='Ts', atomic_number=117, period=7), + Element(symbol='Og', atomic_number=118, period=7), +) + + +ATOMIC_NUMS = {element.atomic_number: element for element in _ELEMENTS} + + +# Lookup by symbol instead of atomic number. +SYMBOLS = {element.symbol: element for element in _ELEMENTS} + + +# Lookup by period. +PERIODS = collections.defaultdict(list) +for element in _ELEMENTS: + PERIODS[element.period].append(element) +PERIODS = {period: tuple(elements) for period, elements in PERIODS.items()} diff --git a/ferminet/utils/scf.py b/ferminet/utils/scf.py new file mode 100644 index 0000000..887332d --- /dev/null +++ b/ferminet/utils/scf.py @@ -0,0 +1,348 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Interaction with Hartree-Fock solver in pyscf.""" + +# Abbreviations used: +# SCF: self-consistent field (method). Another name for Hartree-Fock +# HF: Hartree-Fock method. +# RHF: restricted Hartre-Fock. Require molecular orbital for the i-th alpha-spin +# and i-th beta-spin electrons to have the same spatial component. +# ROHF: restricted open-shell Hartree-Fock. Same as RHF except allows the number +# of alpha and beta electrons to differ. +# UHF: unrestricted Hartre-Fock. Permits breaking of spin symmetry and hence +# alpha and beta electrons to have different spatial components. +# AO: Atomic orbital. Underlying basis set (typically Gaussian-type orbitals and +# built into pyscf). +# MO: molecular orbitals/Hartree-Fock orbitals. Single-particle orbitals which +# are solutions to the Hartree-Fock equations. + + +from typing import Callable, Sequence, Text, Tuple + +from absl import logging +from ferminet.utils import system +import numpy as np +import pyscf +import tensorflow.compat.v1 as tf + + +class Scf: + """Helper class for running Hartree-Fock (self-consistent field) with pyscf. + + Attributes: + molecule: list of system.Atom objects giving the atoms in the molecule + and their positions. + nelectrons: Tuple with number of alpha electrons and beta electrons. + basis: Basis set to use, best specified with the relevant string for a + built-in basis set in pyscf. A user-defined basis set can be used + (advanced). See https://sunqm.github.io/pyscf/gto.html#input-basis for + more details. + restricted: If true, use the restriced Hartree-Fock method, otherwise use + the unrestricted Hartree-Fock method. + """ + + def __init__(self, + molecule: Sequence[system.Atom], + nelectrons: Tuple[int, int], + basis: Text = 'cc-pVTZ', + restricted: bool = True): + self.molecule = molecule + self.nelectrons = nelectrons + self.basis = basis + self.restricted = restricted + self._mean_field = None + self._mol = None + + self._spin = nelectrons[0] - nelectrons[1] + pyscf.lib.param.TMPDIR = None + + def run(self): + """Runs the Hartree-Fock calculation. + + Returns: + A pyscf scf object (i.e. pyscf.scf.rhf.RHF, pyscf.scf.uhf.UHF or + pyscf.scf.rohf.ROHF depending on the spin and restricted settings). + + Raises: + RuntimeError: If the number of electrons in the PySCF molecule is not + consistent with self.nelectrons. + """ + if any(atom.atomic_number - atom.charge > 1.e-8 for atom in self.molecule): + logging.info( + 'Fractional nuclear charge detected. Running SCF on atoms with integer charge.' + ) + nuclear_charge = sum(atom.atomic_number for atom in self.molecule) + charge = nuclear_charge - sum(self.nelectrons) + self._mol = pyscf.gto.Mole( + atom=[[atom.symbol, atom.coords] for atom in self.molecule], + unit='bohr') + self._mol.basis = self.basis + self._mol.spin = self._spin + self._mol.charge = charge + self._mol.build() + if self._mol.nelectron != sum(self.nelectrons): + raise RuntimeError('PySCF molecule not consistent with QMC molecule.') + if self.restricted: + self._mean_field = pyscf.scf.RHF(self._mol) + else: + self._mean_field = pyscf.scf.UHF(self._mol) + self._mean_field.init_guess = 'atom' + self._mean_field.kernel() + return self._mean_field + + def eval_mos(self, positions: np.ndarray, + deriv: bool = False) -> Tuple[np.ndarray, np.ndarray]: + """Evaluates the Hartree-Fock single-particle orbitals at a set of points. + + Args: + positions: numpy array of shape (N, 3) of the positions in space at which + to evaluate the Hartree-Fock orbitals. + deriv: If True, also calculate the first derivatives of the + single-particle orbitals. + + Returns: + Pair of numpy float64 arrays of shape (N, M) (deriv=False) or (4, N, M) + (deriv=True), where 2M is the number of Hartree-Fock orbitals. The (i-th, + j-th) element in the first (second) array gives the value of the j-th + alpha (beta) Hartree-Fock orbital at the i-th electron position in + positions. For restricted (RHF, ROHF) calculations, the two arrays will be + identical. + If deriv=True, the first index contains [value, x derivative, y + derivative, z derivative]. + + Raises: + RuntimeError: If Hartree-Fock calculation has not been performed using + `run`. + NotImplementedError: If Hartree-Fock calculation used Cartesian + Gaussian-type orbitals as the underlying basis set. + """ + if self._mean_field is None: + raise RuntimeError('Mean-field calculation has not been run.') + if self.restricted: + coeffs = (self._mean_field.mo_coeff,) + else: + coeffs = self._mean_field.mo_coeff + # Assumes self._mol.cart (use of Cartesian Gaussian-type orbitals and + # integrals) is False (default behaviour of pyscf). + if self._mol.cart: + raise NotImplementedError( + 'Evaluation of molecular orbitals using cartesian GTOs.') + # Note sph refers to the use of spherical GTO basis sets rather than + # Cartesian GO basis sets. The coordinate system used for the electron + # positions is Cartesian in both cases. + gto_op = 'GTOval_sph_deriv1' if deriv else 'GTOval_sph' + ao_values = self._mol.eval_gto(gto_op, positions) + mo_values = tuple(np.matmul(ao_values, coeff) for coeff in coeffs) + if self.restricted: + # duplicate for beta electrons. + mo_values *= 2 + return mo_values + + def tf_eval_mos(self, positions: tf.Tensor, deriv: bool = False, + ) -> Tuple[tf.Tensor, Callable[[tf.Tensor], tf.Tensor]]: + """Evaluates the Hartree-Fock single-particle orbitals as a tensorflow op. + + See: `eval_mos` for evaluating the orbitals given positions as numpy arrays. + + Args: + positions: Tensor of shape (N, 3) containing N 3D position vectors at + which the Hartree-Fock orbitals are evaluated. + deriv: If True, also evaluates the first derivatives of the orbitals with + respect to positions and makes them available via tf.gradients. + + Returns: + Tensor of shape (N, 2M) and the same dtype as positions. The first + (second) M elements along the last axis gives the value of the alpha-spin + (beta-spin) Hartree-Fock orbitals at the desired positions. + + Raises: + RuntimeError: If the first derivatives are requested but deriv is False. + """ + + # Evaluate MOs in a nested function to avoid issues with custom_gradients + # and self or with the deriv flag argument. + + @tf.custom_gradient + def _eval_mos(positions: tf.Tensor + ) -> Tuple[tf.Tensor, Callable[[tf.Tensor], tf.Tensor]]: + """Computes Hartree-Fock orbitals at given positions.""" + mo_values_derivs = tf.py_func(self.eval_mos, [positions, deriv], + (tf.float64, tf.float64)) + mo_values_derivs = [ + tf.cast(vals, positions.dtype) for vals in mo_values_derivs + ] + if deriv: + mo_values = tf.concat([vals[0] for vals in mo_values_derivs], axis=-1) + mo_derivs = tf.concat([vals[1:] for vals in mo_values_derivs], axis=-1) + else: + mo_values = tf.concat(mo_values_derivs, axis=-1) + mo_derivs = None + def grad(dmo: tf.Tensor) -> tf.Tensor: + """Computes gradients of orbitals with respect to positions.""" + # dmo, reverse sensitivity, is a (N, 2M) tensor. + # mo_derivs is the full Jacobian, (3, N, 2M), where mo_derivs[i,j,k] is + # d\phi_{k} / dr^j_i (derivative of the k-th orbital with respect to the + # i-th component of the j-th position vector). + # positions is a (N, 3) tensor => tf.gradients(mo, positions) is (N, 3). + # tensorflow gradients use summation over free indices (i.e. orbitals). + if mo_derivs is None: + raise RuntimeError( + 'Gradients not computed in forward pass. Set derivs=True.') + g = tf.reduce_sum(mo_derivs * tf.expand_dims(dmo, 0), axis=-1) + return tf.linalg.transpose(g) + return mo_values, grad + + return _eval_mos(positions) + + def tf_eval_hf(self, positions: tf.Tensor, + deriv: bool = False) -> Tuple[tf.Tensor, tf.Tensor]: + """Evaluates Hartree-Fock occupied orbitals at a set of electron positions. + + Note: pyscf evaluates all orbitals at each position, which is somewhat + wasteful for this use-case, where we only need occupied orbitals and further + factorise the wavefunction into alpha and beta parts. + + Args: + positions: (N, nelectrons, 3) tensor. Batch of positions for each + electron. The first self.nelectrons[0] elements in the second axis refer + to alpha electrons, the next self.electrons[1] positions refer to the + beta electrons. + deriv: If True, also evaluates the first derivatives of the orbitals with + respect to positions and makes them available via tf.gradients. + + Returns: + Tuple of tensors for alpha and beta electrons respectively of shape + (N, nspin, nspin), where nspin is the corresponding number of electrons of + that spin. The (i, j, k) element gives the value of the k-th orbital for + the j-th electron in batch i. The product of the determinants of the + tensors hence gives the Hartree-Fock determinant at the set of input + positions. + + Raises: + RuntimeError: If the sum of alpha and beta electrons does not equal + nelectrons. + """ + if sum(self.nelectrons) != positions.shape.as_list()[1]: + raise RuntimeError( + 'positions does not contain correct number of electrons.') + # Evaluate MOs at all electron positions. + # pyscf evaluates all 2M MOs at each 3D position vector. Hance reshape into + # (N*nelectron, 3) tensor for pyscf input. + one_e_positions = tf.reshape(positions, (-1, positions.shape[-1])) + mos = self.tf_eval_mos(one_e_positions, deriv) + nbasis = tf.shape(mos)[-1] // 2 + # Undo reshaping of electron positions to give (N, nelectrons, 2M), where + # the (i, j, k) element gives the value of the k-th MO at the j-th electron + # position in the i-th batch. + mos = tf.reshape(mos, (-1, sum(self.nelectrons), 2*nbasis)) + # Return (using Aufbau principle) the matrices for the occupied alpha and + # beta orbitals. Number of alpha electrons given by electrons[0]. + alpha_spin = mos[:, :self.nelectrons[0], :self.nelectrons[0]] + beta_spin = mos[:, self.nelectrons[0]:, nbasis:nbasis+self.nelectrons[1]] + return alpha_spin, beta_spin + + def tf_eval_slog_slater_determinant(self, flat_positions: tf.Tensor + ) -> Tuple[tf.Tensor, tf.Tensor]: + """Evaluates the signed log HF slater determinant at given flat_positions. + + Interface is chosen to be similar to that used for QMC networks. + + Args: + flat_positions: (N, total_electrons * 3) tensor. Batch of flattened + positions for each electron, reshapeable to (N, total_electrons, 3) + where the first self.nelectrons[0] elements in the second axis refer to + alpha electrons, the next self.nelectrons[1] positions refer to the beta + electrons. + + Returns: + log_abs_slater_determinant: (N, 1) tensor + # log of absolute value of slater determinant + sign: (N, 1) tensor. sign of wavefunction. + + Raises: + RuntimeError: on incorrect shape of flat_positions. + """ + xs = tf.reshape(flat_positions, + [flat_positions.shape[0], + sum(self.nelectrons), -1]) + if xs.shape[2] != 3: + msg = 'flat_positions must be of shape (N, total_electrons*3)' + raise RuntimeError(msg) + matrices = self.tf_eval_hf(xs) + slogdets = [tf.linalg.slogdet(elem) for elem in matrices] + sign_alpha, sign_beta = [elem[0] for elem in slogdets] + log_abs_wf_alpha, log_abs_wf_beta = [elem[1] for elem in slogdets] + log_abs_slater_determinant = tf.expand_dims(tf.math.add(log_abs_wf_alpha, + log_abs_wf_beta), 1) + sign = tf.expand_dims(tf.math.multiply(sign_alpha, sign_beta), 1) + return log_abs_slater_determinant, sign + + def tf_eval_slog_hartree_product(self, flat_positions: tf.Tensor, + deriv: bool = False + ) -> Tuple[tf.Tensor, tf.Tensor]: + """Evaluates the signed log Hartree product without anti-symmetrization. + + Interface is chosen to be similar to that used for QMC networks. + + Args: + flat_positions: (N, total_electrons * 3) tensor. Batch of flattened + positions for each electron, reshapeable to (N, total_electrons, 3) + where the first self.nelectrons[0] elements in the second axis refer to + alpha electrons, the next self.nelectrons[1] positions refer to the beta + electrons. + deriv: specifies if we will need derivatives. + + Returns: + log_abs_hartree_product: (N, 1) tensor. log of abs of Hartree product. + sign: (N, 1) tensor. sign of Hartree product. + """ + xs = tf.reshape(flat_positions, + [int(flat_positions.shape[0]), + sum(self.nelectrons), -1]) + matrices = self.tf_eval_hf(xs, deriv) + log_abs_alpha, log_abs_beta = [ + tf.linalg.trace(tf.log(tf.abs(elem))) for elem in matrices + ] + sign_alpha, sign_beta = [ + tf.reduce_prod(tf.sign(tf.linalg.diag_part(elem)), axis=1) + for elem in matrices + ] + log_abs_hartree_product = tf.expand_dims( + tf.math.add(log_abs_alpha, log_abs_beta), axis=1) + sign = tf.expand_dims(tf.math.multiply(sign_alpha, sign_beta), axis=1) + return log_abs_hartree_product, sign + + def tf_eval_hartree_product(self, flat_positions: tf.Tensor, + deriv: bool = False) -> tf.Tensor: + """Evaluates the signed log Hartree product without anti-symmetrization. + + Interface is chosen to be similar to that used for QMC networks. This is + a convenience wrapper around tf_eval_slog_hartree_product and is hence not + optimised. + + Args: + flat_positions: (N, total_electrons * 3) tensor. Batch of flattened + positions for each electron, reshapeable to (N, total_electrons, 3) + where the first self.nelectrons[0] elements in the second axis refer to + alpha electrons, the next self.nelectrons[1] positions refer to the beta + electrons. + deriv: specifies if we will need derivatives. + + Returns: + (N,1) tensor containing the Hartree product. + """ + log_abs, sign = self.tf_eval_slog_hartree_product(flat_positions, deriv) + return tf.exp(log_abs) * sign diff --git a/ferminet/utils/system.py b/ferminet/utils/system.py new file mode 100644 index 0000000..1ea512a --- /dev/null +++ b/ferminet/utils/system.py @@ -0,0 +1,265 @@ +# Lint as: python3 +# Copyright 2020 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions to create different kinds of systems.""" + +from typing import Sequence +import attr +from ferminet.utils import elements +from ferminet.utils import units as unit_conversion +import numpy as np + + +# Default bond lengths in angstrom for some diatomics. +# Bond lengths from either the G3 dataset: +# 1. http://www.cse.anl.gov/OldCHMwebsiteContent/compmat/comptherm.htm +# 2. L. A. Curtiss, P. C. Redfern, K. Raghavachari, and J. A. Pople, +# J. Chem. Phys, 109, 42 (1998). +# or from NIST (https://cccbdb.nist.gov/diatomicexpbondx.asp). +diatomic_bond_lengths = { + 'BeH': 1.348263, + 'CN': 1.134797, + 'ClF': 1.659091, + 'F2': 1.420604, + 'H2': 0.737164, + 'HCl': 1.2799799, + 'Li2': 2.77306, + 'LiH': 1.639999, + 'N2': 1.129978, + 'NH': 1.039428, + 'CO': 1.150338, + 'BH': 1.2324, + 'PN': 1.491, + 'AlH': 1.648, + 'AlN': 1.786, +} + + +# Default spin polarisation for a few diatomics of interest. +# Otherwise default to either singlet (doublet) for even (odd) numbers of +# electrons. Units: number of unpaired electrons. +diatomic_spin_polarisation = { + 'B2': 2, + 'O2': 2, + 'NH': 2, + 'AlN': 2, +} + + +@attr.s +class Atom: # pytype: disable=invalid-function-definition + """Atom information for Hamiltonians. + + The nuclear charge is inferred from the symbol if not given, in which case the + symbol must be the IUPAC symbol of the desired element. + + Attributes: + symbol: Element symbol. + coords: An iterable of atomic coordinates. Always a list of floats and in + bohr after initialisation. Default: place atom at origin. + charge: Nuclear charge. Default: nuclear charge (atomic number) of atom of + the given name. + atomic_number: Atomic number associated with element. Default: atomic number + of element of the given symbol. Should match charge unless fractional + nuclear charges are being used. + units: String giving units of coords. Either bohr or angstrom. Default: + bohr. If angstrom, coords are converted to be in bohr and units to the + string 'bohr'. + coords_angstrom: list of atomic coordinates in angstrom. + coords_array: Numpy array of atomic coordinates in bohr. + """ + symbol = attr.ib() + coords = attr.ib( + converter=lambda xs: tuple(float(x) for x in xs), + default=(0.0, 0.0, 0.0)) # type: Sequence[float] + charge = attr.ib(converter=float) + atomic_number = attr.ib(converter=int) + units = attr.ib( + default='bohr', validator=attr.validators.in_(['bohr', 'angstrom'])) + + @charge.default + def _set_default_charge(self): + return elements.SYMBOLS[self.symbol].atomic_number + + @atomic_number.default + def _set_default_atomic_number(self): + return elements.SYMBOLS[self.symbol].atomic_number + + def __attrs_post_init__(self): + if self.units == 'angstrom': + self.coords = [unit_conversion.angstrom2bohr(x) for x in self.coords] + self.units = 'bohr' + + @property + def coords_angstrom(self): + return [unit_conversion.bohr2angstrom(x) for x in self.coords] + + @property + def coords_array(self): + if not hasattr(self, '_coords_arr'): + self._coords_arr = np.array(self.coords) + return self._coords_arr + + +def atom(symbol, spins=None, charge=0): + """Return configuration for a single atom. + + Args: + symbol: The atomic symbol from the periodic table + spins (optional): A tuple with the number of spin-up and spin-down electrons + charge (optional): If zero (default), create a neutral atom, otherwise + create an anion if charge is negative or cation if charge is positive. + Returns: + A list with a single Atom object located at zero, and a tuple with the spin + configuration of the electrons. + """ + atomic_number = elements.SYMBOLS[symbol].atomic_number + if charge > atomic_number: + raise ValueError('Cannot have a cation with charge larger than the ' + 'atomic number. Charge: {}, Atomic Number{}'.format( + charge, atomic_number)) + if spins is None: + spin_polarisation = elements.ATOMIC_NUMS[atomic_number-charge].spin_config + nalpha = (atomic_number + spin_polarisation) // 2 + spins = (nalpha, atomic_number - charge - nalpha) + return [Atom(symbol=symbol, coords=(0.0, 0.0, 0.0))], spins + + +def diatomic(symbol1, symbol2, bond_length, spins=None, charge=0, units='bohr'): + """Return configuration for a diatomic molecule.""" + if spins is None: + atomic_number_1 = elements.SYMBOLS[symbol1].atomic_number + atomic_number_2 = elements.SYMBOLS[symbol2].atomic_number + total_charge = atomic_number_1 + atomic_number_2 - charge + if total_charge % 2 == 0: + spins = (total_charge // 2, total_charge // 2) + else: + spins = ((total_charge + 1)// 2, (total_charge - 1) // 2) + + return [ + Atom(symbol=symbol1, coords=(0.0, 0.0, bond_length/2.0), units=units), + Atom(symbol=symbol2, coords=(0.0, 0.0, -bond_length/2.0), units=units) + ], spins + + +def molecule(symbol, bond_length=0.0, units='bohr'): + """Hardcoded molecular geometries from the original Fermi Net paper.""" + + if symbol in diatomic_bond_lengths: + if symbol[-1] == '2': + symbs = [symbol[:-1], symbol[:-1]] + else: # Split a camel-case string on the second capital letter + split_idx = None + for i in range(1, len(symbol)): + if split_idx is None and symbol[i].isupper(): + split_idx = i + if split_idx is None: + raise ValueError('Cannot find second atomic symbol: {}'.format(symbol)) + symbs = [symbol[:split_idx], symbol[split_idx:]] + + atomic_number_1 = elements.SYMBOLS[symbs[0]].atomic_number + atomic_number_2 = elements.SYMBOLS[symbs[1]].atomic_number + total_charge = atomic_number_1 + atomic_number_2 + if symbol in diatomic_spin_polarisation: + spin_pol = diatomic_spin_polarisation[symbol] + spins = ((total_charge + spin_pol) // 2, (total_charge + spin_pol) // 2) + elif total_charge % 2 == 0: + spins = (total_charge // 2, total_charge // 2) + else: + spins = ((total_charge + 1)// 2, (total_charge - 1) // 2) + + if bond_length == 0.0: + bond_length = diatomic_bond_lengths[symbol] + units = 'angstrom' + return diatomic(symbs[0], symbs[1], + bond_length, + units=units, + spins=spins) + + if bond_length != 0.0: + raise ValueError('Bond length argument only appropriate for diatomics.') + + if symbol == 'CH4': + return [ + Atom(symbol='C', coords=(0.0, 0.0, 0.0), units='bohr'), + Atom(symbol='H', coords=(1.18886, 1.18886, 1.18886), units='bohr'), + Atom(symbol='H', coords=(-1.18886, -1.18886, 1.18886), units='bohr'), + Atom(symbol='H', coords=(1.18886, -1.18886, -1.18886), units='bohr'), + Atom(symbol='H', coords=(-1.18886, 1.18886, -1.18886), units='bohr'), + ], (5, 5) + + if symbol == 'NH3': + return [ + Atom(symbol='N', coords=(0.0, 0.0, 0.22013), units='bohr'), + Atom(symbol='H', coords=(0.0, 1.77583, -0.51364), units='bohr'), + Atom(symbol='H', coords=(1.53791, -0.88791, -0.51364), units='bohr'), + Atom(symbol='H', coords=(-1.53791, -0.88791, -0.51364), units='bohr'), + ], (5, 5) + + if symbol == 'C2H4' or symbol == 'ethene' or symbol == 'ethylene': + return [ + Atom(symbol='C', coords=(0.0, 0.0, 1.26135), units='bohr'), + Atom(symbol='C', coords=(0.0, 0.0, -1.26135), units='bohr'), + Atom(symbol='H', coords=(0.0, 1.74390, 2.33889), units='bohr'), + Atom(symbol='H', coords=(0.0, -1.74390, 2.33889), units='bohr'), + Atom(symbol='H', coords=(0.0, 1.74390, -2.33889), units='bohr'), + Atom(symbol='H', coords=(0.0, -1.74390, -2.33889), units='bohr'), + ], (8, 8) + + if symbol == 'C4H6' or symbol == 'bicyclobutane': + return [ + Atom(symbol='C', coords=(0.0, 2.13792, 0.58661), units='bohr'), + Atom(symbol='C', coords=(0.0, -2.13792, 0.58661), units='bohr'), + Atom(symbol='C', coords=(1.41342, 0.0, -0.58924), units='bohr'), + Atom(symbol='C', coords=(-1.41342, 0.0, -0.58924), units='bohr'), + Atom(symbol='H', coords=(0.0, 2.33765, 2.64110), units='bohr'), + Atom(symbol='H', coords=(0.0, 3.92566, -0.43023), units='bohr'), + Atom(symbol='H', coords=(0.0, -2.33765, 2.64110), units='bohr'), + Atom(symbol='H', coords=(0.0, -3.92566, -0.43023), units='bohr'), + Atom(symbol='H', coords=(2.67285, 0.0, -2.19514), units='bohr'), + Atom(symbol='H', coords=(-2.67285, 0.0, -2.19514), units='bohr'), + ], (15, 15) + + raise ValueError('Not a recognized molecule: {}'.format(symbol)) + + +def hn(n, r, charge=0, units='bohr'): + """Return a hydrogen chain with n atoms and separation r.""" + m = n - charge # number of electrons + if m % 2 == 0: + spins = (m//2, m//2) + else: + spins = ((m+1)//2, (m-1)//2) + lim = r * (n-1) / 2.0 + return [Atom(symbol='H', coords=(0.0, 0.0, z), units=units) + for z in np.linspace(-lim, lim, n)], spins + + +def h4_circle(r, theta, units='bohr'): + """Return 4 hydrogen atoms arranged in a circle, a failure case of CCSD(T).""" + return [ + Atom(symbol='H', + coords=(r*np.cos(theta), r*np.sin(theta), 0.0), + units=units), + Atom(symbol='H', + coords=(-r*np.cos(theta), r*np.sin(theta), 0.0), + units=units), + Atom(symbol='H', + coords=(r*np.cos(theta), -r*np.sin(theta), 0.0), + units=units), + Atom(symbol='H', + coords=(-r*np.cos(theta), -r*np.sin(theta), 0.0), + units=units) + ], (2, 2) diff --git a/ferminet/utils/units.py b/ferminet/utils/units.py new file mode 100644 index 0000000..8d441f2 --- /dev/null +++ b/ferminet/utils/units.py @@ -0,0 +1,46 @@ +# Lint as: python3 +# Copyright 2019 DeepMind Technologies Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Basic definition of units and converters useful for chemistry.""" + +from typing import Union +import numpy as np + +# 1 Bohr = 0.52917721067 (12) x 10^{-10} m +# https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0 +# Note: pyscf uses a slightly older definition of 0.52917721092 angstrom. +ANGSTROM_BOHR = 0.52917721067 +BOHR_ANGSTROM = 1. / ANGSTROM_BOHR + +# 1 Hartree = 627.509474 kcal/mol +# https://en.wikipedia.org/wiki/Hartree +KCAL_HARTREE = 627.509474 +HARTREE_KCAL = 1. / KCAL_HARTREE + + +def bohr2angstrom(x_b: Union[float, np.ndarray]) -> Union[float, np.ndarray]: + return x_b * ANGSTROM_BOHR + + +def angstrom2bohr(x_a: Union[float, np.ndarray]) -> Union[float, np.ndarray]: + return x_a * BOHR_ANGSTROM + + +def hartree2kcal(x_b: Union[float, np.ndarray]) -> Union[float, np.ndarray]: + return x_b * KCAL_HARTREE + + +def kcal2hartree(x_a: Union[float, np.ndarray]) -> Union[float, np.ndarray]: + return x_a * HARTREE_KCAL diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..da8ef52 --- /dev/null +++ b/setup.py @@ -0,0 +1,52 @@ +# Copyright 2018-2019 DeepMind Technologies Limited and Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Setup for pip package.""" + +import unittest +from setuptools import find_packages +from setuptools import setup + +REQUIRED_PACKAGES = [ + 'absl-py', 'kfac>=0.2.3', 'numpy<1.19.0', 'pandas', 'pyscf', 'pyblock', + 'dm-sonnet<2.0', 'tables', 'tensorflow>=1.14,<2.0', + 'tensorflow_probability==0.8' +] +EXTRA_PACKAGES = { + 'tensorflow-gpu': ['tensorflow-gpu>=1.14,<2.0'], +} + + +def ferminet_test_suite(): + test_loader = unittest.TestLoader() + test_suite = test_loader.discover('ferminet/tests', pattern='*_test.py') + return test_suite + + +setup( + name='ferminet', + version='0.1', + description='A library to train networks to represent ground state wavefunctions of fermionic systems', + url='https://github.com/deepmind/ferminet', + author='DeepMind', + author_email='no-reply@google.com', + # Contained modules and scripts. + scripts=['bin/ferminet'], + packages=find_packages(), + install_requires=REQUIRED_PACKAGES, + extras_require=EXTRA_PACKAGES, + platforms=['any'], + license='Apache 2.0', + test_suite='setup.ferminet_test_suite', +)