Merge branch '210302_LF_CleanExamples' into 'master'

210302 lf clean examples See merge request multiscale-wdm/surrogate-models/fesl/fesl!40
mala-project · Mar 2, 2021 · f206204 · f206204
2 parents a8a359a + 7a1fee1
commit f206204
Show file tree

Hide file tree

Showing 22 changed files with 817 additions and 953 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -44,11 +44,6 @@ setup-fesl:
     - *data_setup
   script:
     - python examples/ex00_verify_installation.py
-  only:
-    - master
-    - conda-CI
-    - pipelinetest
-    - pipeline_fix_test-basic-functions
 
 test-basic-functions:
   stage: test
@@ -61,11 +56,6 @@ test-basic-functions:
     - cd test
     - python fesl_tests.py
   needs: [setup-fesl]
-  only:
-    - master
-    - conda-CI
-    - pipelinetest
-    - pipeline_fix_test-basic-functions
 
 test-workflow:
   stage: test
@@ -77,10 +67,6 @@ test-workflow:
     - python ex99_verify_all_examples.py
     - cd ..
   needs: [setup-fesl]
-  only:
-    - master
-    - conda-CI
-    - pipeline_fix_test-basic-functions
 
 #### documentation
 
@@ -113,14 +99,6 @@ test-docstrings:
   script:
     - pydocstyle --convention=numpy fesl
   needs: [setup-docs]
-  only:
-    - master
-    - conda-CI
-    - documentation
-    - fix_docstrings
-    - 210222_LF_FixDocstrings
-    - /^fix_docstrings_in_.*$/
-    - pipeline_fix_test-basic-functions
 
 pages:
   stage: deploy
@@ -147,11 +125,3 @@ pages:
     - public
   when:
     always
-  only:
-  - master
-  - conda-CI
-  - documentation
-  - fix_docstrings
-  - 210222_LF_FixDocstrings
-  - /^fix_docstrings_in_.*$/
-  - pipeline_fix_test-basic-functions
diff --git a/examples/ex01_run_singleshot.py b/examples/ex01_run_singleshot.py
@@ -4,8 +4,9 @@
 data_path = get_data_repo_path()+"Al256_reduced/"
 
 """
-ex01_run_singleshot.py: Shows how a neural network can be trained on material data using this framework.
-It uses preprocessed data, that is read in from *.npy files.
+ex01_run_singleshot.py: Shows how a neural network can be trained on material 
+data using this framework. It uses preprocessed data, that is read in 
+from *.npy files.
 """
 
 printout("Welcome to FESL.")
@@ -18,48 +19,61 @@ def run_example01(desired_loss_improvement_factor=1):
 
     ####################
     # PARAMETERS
-    # All parameters are handled from a central parameters class that contains subclasses.
+    # All parameters are handled from a central parameters class that
+    # contains subclasses.
     ####################
+
     test_parameters = fesl.Parameters()
+    # Currently, the splitting in training, validation and test set are
+    # done on a "by snapshot" basis. Specify how this is
+    # done by providing a list containing entries of the form
+    # "tr", "va" and "te".
     test_parameters.data.data_splitting_type = "by_snapshot"
     test_parameters.data.data_splitting_snapshots = ["tr", "va", "te"]
+
+    # Specify the data scaling.
     test_parameters.data.input_rescaling_type = "feature-wise-standard"
     test_parameters.data.output_rescaling_type = "normal"
-    test_parameters.descriptors.twojmax = 11
-    test_parameters.targets.ldos_gridsize = 10
+
+    # Specify the used activation function.
     test_parameters.network.layer_activations = ["ReLU"]
+
+    # Specify the training parameters.
     test_parameters.running.max_number_epochs = 20
     test_parameters.running.mini_batch_size = 40
     test_parameters.running.learning_rate = 0.00001
     test_parameters.running.trainingtype = "Adam"
-    test_parameters.running.use_gpu = False
-    test_parameters.running.use_horovod = False
-    test_parameters.running.use_compression= False
 
     ####################
     # DATA
-    # Read data into RAM.
-    # We have to specify the directories we want to read the snapshots from.
-    # The Handlerinterface will also return input and output scaler objects. These are used internally to scale
-    # the data. The objects can be used after successful training for inference or plotting.
+    # Add and prepare snapshots for training.
     ####################
 
     data_handler = fesl.DataHandler(test_parameters)
 
     # Add a snapshot we want to use in to the list.
-    data_handler.add_snapshot("Al_debug_2k_nr0.in.npy", data_path, "Al_debug_2k_nr0.out.npy", data_path, output_units="1/Ry")
-    data_handler.add_snapshot("Al_debug_2k_nr1.in.npy", data_path, "Al_debug_2k_nr1.out.npy", data_path, output_units="1/Ry")
-    data_handler.add_snapshot("Al_debug_2k_nr2.in.npy", data_path, "Al_debug_2k_nr2.out.npy", data_path, output_units="1/Ry")
-
+    data_handler.add_snapshot("Al_debug_2k_nr0.in.npy", data_path,
+                              "Al_debug_2k_nr0.out.npy", data_path,
+                              output_units="1/Ry")
+    data_handler.add_snapshot("Al_debug_2k_nr1.in.npy", data_path,
+                              "Al_debug_2k_nr1.out.npy", data_path,
+                              output_units="1/Ry")
+    data_handler.add_snapshot("Al_debug_2k_nr2.in.npy", data_path,
+                              "Al_debug_2k_nr2.out.npy", data_path,
+                              output_units="1/Ry")
     data_handler.prepare_data()
     printout("Read data: DONE.")
 
     ####################
     # NETWORK SETUP
     # Set up the network and trainer we want to use.
+    # The layer sizes can be specified before reading data,
+    # but it is safer this way.
     ####################
 
-    test_parameters.network.layer_sizes = [data_handler.get_input_dimension(), 100, data_handler.get_output_dimension()]
+    test_parameters.network.layer_sizes = [data_handler.get_input_dimension(),
+                                           100,
+                                           data_handler.get_output_dimension()]
 
     # Setup network and trainer.
     test_network = fesl.Network(test_parameters)
@@ -76,10 +90,15 @@ def run_example01(desired_loss_improvement_factor=1):
     printout("Training: DONE.")
 
     ####################
+    # RESULTS.
+    # Print the used parameters and check whether the loss decreased enough.
+    ####################
+
     printout("Parameters used for this experiment:")
     test_parameters.show()
 
-    if desired_loss_improvement_factor*test_trainer.initial_test_loss < test_trainer.final_test_loss:
+    if desired_loss_improvement_factor*test_trainer.initial_test_loss\
+            < test_trainer.final_test_loss:
         return False
     else:
         return True
@@ -89,5 +108,7 @@ def run_example01(desired_loss_improvement_factor=1):
     if run_example01():
         printout("Successfully ran ex01_run_singleshot.")
     else:
-        raise Exception("Ran ex01_run_singleshot but something was off. If you haven't changed any parameters in "
-                        "the example, there might be a problem with your installation.")
+        raise Exception("Ran ex01_run_singleshot but something was off."
+                        " If you haven't changed any parameters in "
+                        "the example, there might be a problem with your"
+                        " installation.")
diff --git a/examples/ex02_preprocess_data.py b/examples/ex02_preprocess_data.py
@@ -0,0 +1,91 @@
+import fesl
+from fesl import printout
+from data_repo_path import get_data_repo_path
+import numpy as np
+data_path = get_data_repo_path()+"Al36/"
+
+"""
+ex02_preprocess_data.py: Shows how this framework can be used to preprocess
+data. Preprocessing here means converting raw DFT calculation output into 
+numpy arrays of the correct size. For the input data, this means descriptor
+calculation.
+
+Further preprocessing steps (scaling, unit conversion) is done later. 
+"""
+
+
+def run_example02():
+
+    ####################
+    # PARAMETERS
+    # All parameters are handled from a central parameters class that
+    # contains subclasses.
+    ####################
+
+    test_parameters = fesl.Parameters()
+
+    # Specify input data options, i.e. which descriptors are calculated
+    # with which parameters. These are the standard parameters for
+    # the calculation of SNAP descriptors.
+    test_parameters.descriptors.descriptor_type = "SNAP"
+    test_parameters.descriptors.twojmax = 10
+    test_parameters.descriptors.rcutfac = 4.67637
+    test_parameters.data.descriptors_contain_xyz = True
+
+    # Specify output data options, i.e. how the LDOS is parsed.
+    # The Al system used as an example here actually has 250 energy levels.
+    # But for the convenience of the user, only 10 energy levels will be
+    # used for this example.
+    test_parameters.targets.target_type = "LDOS"
+    test_parameters.targets.ldos_gridsize = 10
+    test_parameters.targets.ldos_gridspacing_ev = 0.1
+    test_parameters.targets.ldos_gridoffset_ev = -10
+
+    ####################
+    # DATA
+    # Create a DataConverter, and add snapshots to it.
+    ####################
+
+    data_converter = fesl.DataConverter(test_parameters)
+
+    # Take care to choose the "add_snapshot" function correct for
+    # the type of data you want to preprocess.
+    data_converter.add_snapshot_qeout_cube("Al.pw.scf.out", data_path,
+                                           "cubes/tmp.pp*Al_ldos.cube",
+                                           data_path, output_units="1/Ry")
+
+    # Convert all the snapshots and save them in the current directory.
+    # data_converter.convert_snapshots("./", naming_scheme="Al_snapshot*")
+
+    ####################
+    # RESULTS.
+    # Print the used parameters and check whether the preprocessed data
+    # has the desired dimensions.
+    ####################
+
+    printout("Parameters used for this experiment:")
+    test_parameters.show()
+
+    input_data = np.load("Al_snapshot0.in.npy")
+    input_data_shape = np.shape(input_data)
+    if input_data_shape[0] != 108 or input_data_shape[1] != 108 or \
+            input_data_shape[2] != 100 or input_data_shape[3] != 94:
+        return False
+
+    output_data = np.load("Al_snapshot0.out.npy")
+    output_data_shape = np.shape(output_data)
+    if output_data_shape[0] != 108 or output_data_shape[1] != 108 or \
+            output_data_shape[2] != 100 or output_data_shape[3] != 10:
+        return False
+
+    return True
+
+
+if __name__ == "__main__":
+    if run_example02():
+        printout("Successfully ran ex02_preprocess_data.")
+    else:
+        raise Exception("Ran ex02_preprocess_data but something was off."
+                        " If you haven't changed any parameters in "
+                        "the example, there might be a problem with your"
+                        " installation.")
diff --git a/examples/ex03_postprocess_data.py b/examples/ex03_postprocess_data.py
@@ -0,0 +1,110 @@
+import fesl
+from fesl import printout
+import numpy as np
+from data_repo_path import get_data_repo_path
+data_path = get_data_repo_path()+"Al36/"
+
+
+"""
+ex03_postprocess_data.py: Shows how this framework can be used to
+postprocess data. Usually, this framework outputs LDOS data, thefore,
+post processing of LDOS data will be shown in the following. 
+Set do_total_energy to False, if you don't have the QuantumEspresso
+Python module installed.
+"""
+
+
+def run_example03(do_total_energy=True, accuracy_electrons = 1e-11,
+                  accuracy_total_energy=50):
+
+    ####################
+    # PARAMETERS
+    # All parameters are handled from a central parameters class that
+    # contains subclasses.
+    ####################
+    test_parameters = fesl.Parameters()
+
+    # Specify the correct LDOS parameters.
+    test_parameters.targets.target_type = "LDOS"
+    test_parameters.targets.ldos_gridsize = 250
+    test_parameters.targets.ldos_gridspacing_ev = 0.1
+    test_parameters.targets.ldos_gridoffset_ev = -10
+
+    ####################
+    # TARGETS
+    # Create a target calculator to postprocess data.
+    # Use this calculator to perform various operations.
+    ####################
+
+    ldos = fesl.TargetInterface(test_parameters)
+
+    # Read additional information about the calculation.
+    # By doing this, the calculator is able to know e.g. the temperature
+    # at which the calculation took place or the lattice constant used.
+    ldos.read_additional_calculation_data("qe.out",
+                                          data_path+"Al.pw.scf.out")
+
+    # Read in LDOS data. For actual workflows, this part will come
+    # from a network.
+    ldos_data = np.load(data_path+"Al_ldos.npy")
+
+    # Get quantities of interest.
+    # For better values in the post processing, it is recommended to
+    # calculate the "self-consistent Fermi energy", i.e. the Fermi energy
+    # at which the (L)DOS reproduces the exact number of electrons.
+    # This Fermi energy usually differs from the one outputted by the
+    # QuantumEspresso calculation, due to numerical reasons. The difference
+    # is usually very small.
+    self_consistent_fermi_energy = ldos.\
+        get_self_consistent_fermi_energy_ev(ldos_data)
+    number_of_electrons = ldos.\
+        get_number_of_electrons(ldos_data, fermi_energy_eV=
+                                self_consistent_fermi_energy)
+    band_energy = ldos.get_band_energy(ldos_data,
+                                       fermi_energy_eV=
+                                       self_consistent_fermi_energy)
+    if do_total_energy:
+        # To perform a total energy calculation one also needs to provide
+        # a pseudopotential(path).
+        ldos.set_pseudopotential_path(data_path)
+        total_energy = ldos.get_total_energy(ldos_data,
+                                             fermi_energy_eV=
+                                             self_consistent_fermi_energy)
+
+    ####################
+    # RESULTS.
+    # Print the used parameters and check whether LDOS based results
+    # are consistent with the actual DFT results.
+    ####################
+
+    printout("Parameters used for this experiment:")
+    test_parameters.show()
+
+    print("Number of electrons:", number_of_electrons)
+    print("Band energy:", band_energy)
+    if do_total_energy:
+        print("Total energy:", total_energy)
+
+    if np.abs(number_of_electrons - ldos.number_of_electrons) > \
+            accuracy_electrons:
+        return False
+
+    # FIXME: Add  as soon as band_energy_dft_calculation is fixed.
+    # if np.abs(number_of_electrons - ldos.number_of_electrons) > accuracy:
+    #     return True
+
+    if do_total_energy:
+        if np.abs(total_energy - ldos.total_energy_dft_calculation) > \
+                accuracy_total_energy:
+            return False
+    return True
+
+
+if __name__ == "__main__":
+    if run_example03():
+        printout("Successfully ran ex03_postprocess_data.")
+    else:
+        raise Exception("Ran ex03_postprocess_data but something was off."
+                        " If you haven't changed any parameters in "
+                        "the example, there might be a problem with your"
+                        " installation.")