v0.1.0: First pre-alpha release of MALA.

mala-project · Mar 23, 2021 · 544c1bf · 544c1bf
2 parents f206204 + 66026f8
commit 544c1bf
Show file tree

Hide file tree

Showing 89 changed files with 2,472 additions and 1,207 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -1,44 +1,45 @@
 default:
-  image: registry.hzdr.de/multiscale-wdm/surrogate-models/fesl/fesl
+  image: $CI_REGISTRY_IMAGE:latest
 
 # Change pip's cache and conda's package directories to be inside the project
 # directory since we can only cache local items.
 variables:
   CONDA_PKGS_DIRS: "${CI_PROJECT_DIR}/conda/pkgs"
   PIP_CACHE_DIR: "${CI_PROJECT_DIR}/.cache/pip"
 
-cache:
-  paths:
-    - conda/pkgs
-    - .cache/pip
-  policy: pull
-
 stages:
+  - build
   - setup
   - test
   - deploy
 
 .env_setup: &env_setup
-  - conda env create -q -f install/fesl_cpu_environment.yml
-  - conda clean --tarballs --yes
-  - source activate fesl-cpu
   - pip install -q -e .
 
 .data_setup: &data_setup
   - cd ..
-  - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.hzdr.de/multiscale-wdm/surrogate-models/fesl/data  fesl_data_repo
-  - cd fesl_data_repo
+  - git clone -b v0.1.2  https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/hzdr/mala/data  mala_data_repo
+  - cd mala_data_repo
   - git lfs install
-  - bash ../fesl/install/data_repo_link/link_data_repo.sh `pwd`
-  - cd ../fesl
+  - bash ../mala/install/data_repo_link/link_data_repo.sh `pwd`
+  - cd ../mala
+
+build-image:
+  stage: build
+  image: docker:20.10
+  services:
+    - docker:20.10-dind
+  before_script:
+    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+  script:
+    - docker pull $CI_REGISTRY_IMAGE:latest || true
+    - docker build --cache-from $CI_REGISTRY_IMAGE:latest --tag $CI_REGISTRY_IMAGE:latest .
+    - docker push $CI_REGISTRY_IMAGE:latest
+  tags:
+    - dind
 
-setup-fesl:
+setup-mala:
   stage: setup
-  cache:
-    paths:
-      - conda/pkgs
-      - .cache/pip
-    policy: pull-push
   before_script:
     - *env_setup
     - *data_setup
@@ -54,22 +55,23 @@ test-basic-functions:
     - *data_setup
   script:
     - cd test
-    - python fesl_tests.py
-  needs: [setup-fesl]
+    - python mala_tests.py
+  needs: [setup-mala]
 
 test-workflow:
   stage: test
+  tags:
+    - performance
   before_script:
     - *env_setup
     - *data_setup
   script:
     - cd examples
     - python ex99_verify_all_examples.py
     - cd ..
-  needs: [setup-fesl]
+  needs: [setup-mala]
 
 #### documentation
-
 setup-docs:
   stage: setup
   image: python:3.7
@@ -97,7 +99,7 @@ test-docstrings:
     - pip install -qU pip
     - pip install -qU pydocstyle
   script:
-    - pydocstyle --convention=numpy fesl
+    - pydocstyle --convention=numpy mala
   needs: [setup-docs]
 
 pages:
@@ -115,7 +117,7 @@ pages:
   - pip install -qU recommonmark
   - pip install -qU sphinx-markdown-tables
   script:
-  - sphinx-apidoc -o docs/source/api fesl
+  - sphinx-apidoc -o docs/source/api mala
   - sphinx-build -b html -d docs/_build/doctrees docs/source docs/_build/html
   after_script:
   - mv docs/_build/html public

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM continuumio/miniconda3:4.9.2
+
+# Update the image to the latest packages
+RUN apt-get update && apt-get upgrade -y
+
+RUN apt-get install --no-install-recommends -y build-essential \
+                                               libz-dev swig git-lfs && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY install/mala_cpu_environment.yml .
+RUN conda env create -q -f mala_cpu_environment.yml && rm -rf /opt/conda/pkgs/*
+
+RUN echo "source activate mala-cpu" > ~/.bashrc
+ENV PATH /opt/conda/envs/mala-cpu/bin:$PATH
diff --git a/README.rst b/README.rst
@@ -1,7 +1,7 @@
-FESL
+MALA
 ====
 
-FESL (Framework for Electronic Structure Learning) is a machine-learning
+MALA tools (Materials Analysis and Learning) is a machine-learning
 based framework to enable multiscale modeling by bypassing
 computationally expensive density functional simulations. It is designed
 as a python package. This repository is structured as follows:
@@ -10,15 +10,15 @@ as a python package. This repository is structured as follows:
 
    ├── examples : contains useful examples to get you started with the package
    ├── install : contains scripts for setting up this package on your machine
-   ├── fesl : the source code itself
+   ├── mala : the source code itself
    ├── test : test scripts used during development, will hold tests for CI in the future
    └── docs : Sphinx documentation folder
 
 
 Installation
 ------------
 
-Please refer to :doc:`Installation of FESL <install/README>`.
+Please refer to `Installation of MALA <https://multiscale-wdm.pages.hzdr.de/surrogate-models/fesl/fesl/install/README.html>`_.
 
 Running
 -------

diff --git a/docs/Makefile b/docs/Makefile
@@ -20,4 +20,4 @@ help:
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 apidocs:
-	sphinx-apidoc -f -o source/api ../fesl
+	sphinx-apidoc -f -o source/api ../mala
diff --git a/docs/source/CONTRIBUTE.md b/docs/source/CONTRIBUTE.md
@@ -1,16 +1,52 @@
-## Contributing to FESL
+## Contributing to MALA
+
+MALA is open source software and as such always welcomes additions and 
+improvements. However, we kindly ask any contributor to adhere to the following
+suggestions in order to keep the overall quality and maintainability of the 
+code high. 
+
+## Versioning and releases
+
+MALA has a versioning system. The version number is only updated when merging
+on `master`. This constitues a release. Please note that not all changes
+to code constitute such a release and generally, merges will be directed
+to the `develop` branch
+(see [branching strategy](#branching-strategy)). The version number has
+the form `MAJOR.MINOR.FIX`:
+
+* `MAJOR`: Big changes to the code, that fundamentally change the way it 
+  functions or wrap up a longer development.
+* `MINOR`: new features have beend added to the code.
+* `FIX`: A bug in a feature has been fixed. 
 
-Some introductory text ...
 
 ## Branching strategy
 
-In general, contributors should develop on branches based off of `master` and merge requests should be to `master`.
+In general, contributors should develop on branches based off of `develop` and
+merge requests should be to `develop`. Please choose a descriptive branch name,
+ideally incorporating some identifying information (such as your initials)
+or the starting date of your developments. Branches from `develop` to `master`
+will be done after prior consultation of the core development team.
+
+## Developing code
+
+* Regularly check your code for PEP8 compliance
+* Make sure all your classes, functions etc. are documented properly, 
+  follow the 
+  [NumPy style](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard) 
+  for docstrings
+* Keep your code object-oriented, modular, and easily reusable
+* Add tests and examples for sizable new features
 
 ## Submitting a merge request
 
-- ensure that your documentation of classes, functions etc. follow the [NumPy style](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard) for docstrings
-- rebase your branch onto `master` before submitting a merge request
+* Ensure that you followed the guidelines for [developing code](#developing-code)
+* Rebase your branch onto `develop` before submitting a merge request
+* Only assign yourself to a merge request when time does not permit an 
+  external check
 
 ## Issues
 
-...
+* Use issues to document potential enhancements, bugs and such
+* Please tag your issues, and consider setting up a deadline 
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -20,7 +20,7 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'FESL'
+project = 'MALA'
 copyright = '2021, HZDR'
 author = 'HZDR'
 

diff --git a/docs/source/features/background.rst b/docs/source/features/background.rst
@@ -0,0 +1,60 @@
+Background
+===========
+
+Workflow
+*********
+
+The goal of MALA is to build surrogate models for electronic structure theory.
+These surrogate models are based on neural networks. After training such
+a model, it allows the fast evaluation of the total energy and atomic forces.
+MALA is build around Density Functional Theory, but can in
+principle be used with all electronic structure methods that calculate the
+total energy and atomic forces given atomic positions as input.
+Building these surrogate models requires preprocessing
+:doc:`preprocessing <preprocessing>` of the data, training of a
+:doc:`neural network <neuralnetworks>` and
+:doc:`postprocessing <postprocessing>` of the results.
+MALA is designed for the investigation of systems at non-zero temperatures and
+operates in a "per-grid-point" manner, meaning that every grid point of a
+simulation cell is passed through the network individually.
+
+Density Functional Theory
+*************************
+
+Density Functional Theiry is one of the most popular electronic structure
+calculation methods due to its combination of reasonable accuracy and
+computational cost.
+In DFT, the central quantity is the electronic density of a given system.
+Within the Kohn-Sham framework, this density is given by
+
+.. math::
+
+    n(\boldsymbol{r}) = \sum_j f^\beta(\epsilon_j)\,
+    |\phi_j(\boldsymbol{r})|^2 \; .
+
+Here, :math:`\phi_j(\boldsymbol{r})` denote the Kohn-Sham wave functions,
+which are given by the Kohn-Sham equations
+
+.. math::
+
+   \left[-\frac{1}{2}\nabla^2 + v_\mathrm{{\scriptscriptstyle S}}(\mathbf{r};
+   \underline{\boldsymbol{R}})\right] \phi_j(\boldsymbol{r};
+   \underline{\boldsymbol{R}}) = \epsilon_j \phi_j(\boldsymbol{r};
+   \underline{\boldsymbol{R}}) \; ,
+
+which give a system of non-interacting particles restricted to reproduce
+the density of the interacting system. The total energy is evaluated using
+
+.. math::
+
+    E_\mathrm{total}(\underline{\boldsymbol{r}}) =
+    T_\mathrm{{\scriptscriptstyle S}}
+    [n](\underline{\boldsymbol{r}}) -
+    S_\mathrm{{\scriptscriptstyle S}}
+    [n](\underline{\boldsymbol{r}})/\beta
+    + E_\mathrm{{\scriptscriptstyle H}}
+    [n](\underline{\boldsymbol{r}}) +
+    E_\mathrm{{\scriptscriptstyle XC}}[n](\underline{\boldsymbol{r}})
+    + E^{ei}[n](\underline{\boldsymbol{r}})+ E^{ii} + \mu N_e \; .
+
+Forces and other quantities of interest can be derived from the total energy.
diff --git a/docs/source/features/neuralnetworks.rst b/docs/source/features/neuralnetworks.rst
@@ -0,0 +1,6 @@
+Neural Networks
+=================
+
+Neural networks are powerful machine learning tools in principle capable of
+approximating any function. In MALA, neural networks are built using PyTorch.
+Hyperparameter optimization can be done using optuna and custom routines.
diff --git a/docs/source/features/postprocessing.rst b/docs/source/features/postprocessing.rst
@@ -0,0 +1,27 @@
+Postprocessing
+==============
+
+MALA provides routines to calculate quantities of interests from the physical
+data such as the LDOS, DOS and electronic density.
+
+LDOS
+*****
+
+The LDOS can be used to calculate the number of electrons and band energy
+(both via the DOS) and the total energy (via the DOS and the electronic
+density) of a system. In order to do that, the LDOS is usually integrated
+either on the spatial or the energy grid, yielding the DOS or density
+respectively.
+
+DOS
+***
+
+The DOS can be used to calculate the number of electrons and band energy
+as well as the entropy contribution to the total energy.
+
+
+Density
+*******
+
+The Density can be used to calculate the number of electrons and density
+based contributions to the total energy.
diff --git a/docs/source/features/preprocessing.rst b/docs/source/features/preprocessing.rst
@@ -0,0 +1,21 @@
+Preprocessing
+==============
+
+Descriptors
+***********
+
+The input data for MALA are, in theory, the atomic positions. As MALA
+oeprates "per-grid-point" manner, information from the atomic positions have
+to be present on the entire grid of the simulation cell. This is done by
+calculating descriptors on the grid. Currently, only SNAP descriptors are
+supported. MALA uses LAMMPS to calculate these SNAP descriptors.
+
+Targets
+***********
+
+MALA is optimized for the usage of the LDOS (local density of states) as
+target quantity. The LDOS gives the DOS (density of states) at each grid point,
+and thus gives information on the energy-grid as well as the 3D grid.
+The LDOS can be used to :doc:`efficiently calculate quantities of interest.
+<postprocessing>` MALA provides parsing routines to read the LDOS from
+DFT calculations.
diff --git a/docs/source/features/template.rst b/docs/source/features/template.rst