add functionality to extract equations from aflow --proto

openkim · Dec 9, 2024 · d001e5c · d001e5c
1 parent 32e74cb
commit d001e5c
Show file tree

Hide file tree

Showing 5 changed files with 106 additions and 23 deletions.
diff --git a/kim_tools/aflow_util/core.py b/kim_tools/aflow_util/core.py
@@ -12,10 +12,12 @@
 from curses.ascii import isalpha, isupper, isdigit
 from typing import Dict, List, Tuple, Union, Optional
 from tempfile import NamedTemporaryFile
+from sympy import parse_expr,matrix2numpy,linear_eq_to_matrix,Symbol
 
 __author__ = ["ilia Nikiforov", "Ellad Tadmor"]
 __all__ = [
     "split_parameter_array",
+    "internal_parameter_sort_key",
     "get_stoich_reduced_list_from_prototype",
     "get_species_list_from_string",
     "read_shortnames",
@@ -83,6 +85,16 @@ def split_parameter_array(parameter_names: List[str], list_to_split: Optional[Li
 
     return cell_part, internal_part
 
+def internal_parameter_sort_key(parameter_name: Union[Symbol,str] ) -> int:
+    """
+    Sorting key for internal free parameters. Sort by number first, then letter
+    """
+    parameter_name_str = str(parameter_name)
+    axis = parameter_name_str[0]
+    assert axis == 'x' or axis == 'y' or axis == 'z', 'Parameter name must start with x, y, or z'
+    number = int(parameter_name_str[1:])
+    return 1000*number + ord(axis)
+
 def get_stoich_reduced_list_from_prototype(prototype_label: str) -> List[int]:
     """
     Get numerical list of stoichiometry from prototype label, i.e. "AB3\_...." -> [1,3]
@@ -316,7 +328,7 @@ def __init__(self, aflow_executable:str="aflow", aflow_work_dir:str="",np:int=1)
         else:
             self.aflow_work_dir = aflow_work_dir
 
-    def aflow_command(self, cmd: List[str]) -> str:
+    def aflow_command(self, cmd: Union[str,List[str]]) -> str:
         """
         Run AFLOW executable with specified arguments and return the output, possibly multiple times piping outputs to each other     
 
@@ -330,6 +342,9 @@ def aflow_command(self, cmd: List[str]) -> str:
         Returns:
             Output of the AFLOW command
         """
+        if not isinstance(cmd,list):
+            cmd = [cmd]
+
         cmd_list = [self.aflow_executable + " --np=" + str(self.np) + " " + cmd_inst
             for cmd_inst in cmd]
         cmd_str = " | ".join(cmd_list)                
@@ -665,7 +680,7 @@ def build_atoms_from_prototype(
 
         return atoms
 
-    def get_equations_from_prototype(self, prototype_label: str, parameter_values: List[float], species: Optional[List[str]] = None) -> \
+    def get_equations_from_prototype(self, prototype_label: str, parameter_values: List[float]) -> \
         Tuple[List[Dict],List[str]]:
         """
         Get the symbolic equations for the fractional positions in the unit cell of an AFLOW prototype
@@ -675,16 +690,41 @@ def get_equations_from_prototype(self, prototype_label: str, parameter_values: L
                 An AFLOW prototype label, without an enumeration suffix, without specified atomic species
             parameter_values: 
                 The free parameters of the AFLOW prototype designation
-            species:
-                Stoichiometric species, e.g. ``['Mo','S']`` corresponding to A and B respectively for prototype label AB2_hP6_194_c_f indicating molybdenite.
-                If this is omitted, the equations will be returned with symbolic species (i.e. A, B, etc.)
             
         Returns:
             Two lists. The second list is a list of the names internal free parameters of the crystal sorted according to AFLOW convention (e.g. ['x1','x2','y2'])
-            The first list contains one dictionary for each atomic position. It has keys 'equation' and 'species'. The 'equation' is a 3-by-n numpy matrix, where
-            n is the number of internal free parameters. This way, multiplying this matrix by the vector of internal free parameters results in a column vector
-            of fractional coordinates for that atom.
-    
+            The first list contains one dictionary for each atomic position. It has keys 'equations' and 'species'. The 'equations' is a 3-by-(n+1) numpy matrix, where
+            n is the number of internal free parameters. This way, multiplying this matrix by the vector of internal free parameters plus '1' (for the constant terms)
+            results in a column vector of fractional coordinates for that atom. 'species' is the virtual species (e.g. A,B etc)
         """
 
-
+        equation_poscar = self.aflow_command(f'--proto={prototype_label} --params={",".join([str(param) for param in parameter_values])} --add_equations') # equations_only is buggy
+
+        # First, parse the lines into equations and construct set of free parameters. While at it, populate the symbols for return
+        list_of_systems_of_equations = [] # list of lists
+        free_params_set = set() # data type: sympy.Symbol
+        return_list = []
+        seen_this_many_lines_starting_with_direct = 0
+        for line in equation_poscar.splitlines():
+            if seen_this_many_lines_starting_with_direct < 2:
+                if line.startswith('Direct('):
+                    seen_this_many_lines_starting_with_direct += 1
+                continue
+            line_split = line.split()
+            return_list.append({'species':line_split[3]})
+            system_of_equations = []
+            for expression_string in line_split[:3]:
+                coordinate_expr = parse_expr(expression_string)
+                free_params_set.update(coordinate_expr.free_symbols)
+                system_of_equations.append(coordinate_expr)
+            list_of_systems_of_equations.append(system_of_equations)
+
+        free_params_list = list(free_params_set) # data type: sympy.Symbol
+        free_params_list.sort(key = internal_parameter_sort_key)
+
+        # loop a second time (necessary because we needed to have constructed the set of free parameters first)
+        for return_dict,system_of_equations in zip(return_list,list_of_systems_of_equations):
+            a,b = linear_eq_to_matrix(system_of_equations,free_params_list)
+            return_dict['equations'] = np.concatenate((matrix2numpy(a,dtype=np.float64),matrix2numpy(-b,dtype=np.float64)),axis=1)
+
+        return return_list,[str(free_param) for free_param in free_params_list]
diff --git a/setup.py b/setup.py
@@ -28,7 +28,7 @@
     long_description_content_type="text/markdown",
     packages=setuptools.find_packages(),
     license="CDDL",
-    install_requires=["ase >= 3.19.0b1", "kim-property >= 2.6.2", "kim-edn >= 1.4.1", "spglib >= 2.1.0", "kim-query >= 3.0.0"],
+    install_requires=["ase >= 3.19.0b1", "kim-property >= 2.6.2", "kim-edn >= 1.4.1", "spglib >= 2.1.0", "kim-query >= 3.0.0", "sympy >= 1.13.2"],
     classifiers=[
         "Development Status :: 4 - Beta"
         "License :: OSI Approved :: Common Development and Distribution License 1.0 (CDDL-1.0)",

diff --git a/tests/test_AFLOW.py b/tests/test_AFLOW.py
@@ -1,3 +1,5 @@
+#!/usr/bin/python
+
 from kim_tools import AFLOW, split_parameter_array
 import numpy as np
 
@@ -7,31 +9,65 @@
 {"species": ["U"], "prototype_label": "A_oC4_63_c", "parameter_names": ["a", "b/a", "c/a", "y1"], "parameter_values": [3.38, 1.7635799, 1.6884024, 0.14768057]},
 {"species": ["Mo", "Ni"], "prototype_label": "AB4_tI10_87_a_h", "parameter_names": ["a", "c/a", "x2", "y2"], "parameter_values": [5.6931, 0.62094465, 0.40159241, 0.80168077]},
 {"species": ["O", "Ti"], "prototype_label": "A2B_tP6_136_f_a", "parameter_names": ["a", "c/a", "x2"], "parameter_values": [4.6726, 0.64700595, 0.30516921]},
-{"species": ["Al", "Pd"], "prototype_label": "AB_hR26_148_a2f_b2f", "parameter_names": ["a", "c/a", "x3", "y3", "z3", "x4", "y4", "z4", "x5", "y5", "z5", "x6", "y6", "z6"], "parameter_values": [15.732393, 0.33556832, 0.5574733, 0.84810628, 0.59879838, 0.25162269, 0.65374741, 0.096413033, 0.056906892, 0.34500705, 0.098395144, 0.75098957, 0.15501208, 0.59773116]},
+#{"species": ["Al", "Li"], "prototype_label": "A2B3_hR5_166_c_ac", "parameter_names": ["a", "c/a", "x2", "x3"], "parameter_values": [4.4631793, 3.1571619, 0.19703494, 0.5976076]},
+#{"species": ["Al", "C"], "prototype_label": "A4B3_hR7_166_2c_ac", "parameter_names": ["a", "c/a", "x2", "x3", "x4"], "parameter_values": [3.3544538, 7.492051, 0.70650219, 0.87016163, 0.78319433]},
+#{"species": ["Al", "Au"], "prototype_label": "A3B8_hR44_167_bce_2c2f", "parameter_names": ["a", "c/a", "x2", "x3", "x4", "x5", "x6", "y6", "z6", "x7", "y7", "z7"], "parameter_values": [7.8105146, 5.4347116, 0.15613556, 0.21753425, 0.93687697, 0.56692249, 0.64901599, 0.0043751172, 0.29495592, 0.61541438, 0.35227113, 0.87563146]},
+#{"species": ["Al", "Pd"], "prototype_label": "AB_hR26_148_a2f_b2f", "parameter_names": ["a", "c/a", "x3", "y3", "z3", "x4", "y4", "z4", "x5", "y5", "z5", "x6", "y6", "z6"], "parameter_values": [15.732393, 0.33556832, 0.5574733, 0.84810628, 0.59879838, 0.25162269, 0.65374741, 0.096413033, 0.056906892, 0.34500705, 0.098395144, 0.75098957, 0.15501208, 0.59773116]},
+{"species": ["Mo", "S"], "prototype_label": "AB2_hR3_166_a_c", "parameter_names": ["a", "c/a", "x2"], "parameter_values": [3.2047217, 5.9412636, 0.24969054]},
 {"species": ["O", "Si"], "prototype_label": "A2B_hP9_154_c_a", "parameter_names": ["a", "c/a", "x1", "x2", "y2", "z2"], "parameter_values": [5.0682, 1.0946293, 0.48489843, 0.41536476, 0.23922009, 0.80786585]},
 {"species": ["Fe", "P"], "prototype_label": "A2B_hP9_189_fg_ad", "parameter_names": ["a", "c/a", "x3", "x4"], "parameter_values": [6.4943, 0.52673883, 0.41418679, 0.73946286]},
 {"species": ["As", "Ga"], "prototype_label": "AB_cP16_205_c_c", "parameter_names": ["a", "x1", "x2"], "parameter_values": [7.0281, 0.14318501, 0.34443472]},
 {"species": ["Ce", "O"], "prototype_label": "AB2_cF12_225_a_c", "parameter_names": ["a"], "parameter_values": [5.4908]},
 {"species": ["Al"], "prototype_label": "A_cF4_225_a", "parameter_names": ["a"], "parameter_values": [4.039]},
 ]
 
-def test_aflow():
+def test_get_equations_from_prototype():
     aflow = AFLOW()
     for case in TEST_CASES:
         parameter_names = case.pop("parameter_names")
         _, internal_parameter_names_ref = split_parameter_array(parameter_names)
         _, internal_parameter_values = split_parameter_array(parameter_names,case["parameter_values"])
-        internal_parameter_values = np.asarray(internal_parameter_values)
+        internal_parameter_values_and_one = np.asarray(internal_parameter_values+[1])
         atoms = aflow.build_atoms_from_prototype(**case)
+        species = case.pop("species")
         equations, internal_parameter_names = aflow.get_equations_from_prototype(**case)
-        assert internal_parameter_names == internal_parameter_names_ref, "get_equations_from_prototype got incorrect internal parameter names"
+
+        assert internal_parameter_names == internal_parameter_names_ref, \
+            f"get_equations_from_prototype got incorrect internal parameter names, got {internal_parameter_names} expected {internal_parameter_names_ref}"
         assert len(equations) == len(atoms), "get_equations_from_prototype got an incorrect number of equations"
-        for equation,atom in zip(equations,atoms):
-            assert equation["species"] == atom.symbol, "get_equations_from_prototype got incorrect species"
-            position_differences = np.matmul(equation["equation"],internal_parameter_values) - atom.scaled_position
-            for position_difference in position_differences:
-                assert (np.allclose(position_difference,-1.0) or np.allclose(position_difference,0.0) or np.allclose(position_difference,1.0)), \
-                    "get_equations_from_prototype got incorrect position"        
+
+        virtual_to_real_species_map = {}
+        for i,symbol in enumerate(species):
+            virtual_to_real_species_map[symbol]=chr(65+i)
+
+        diagnostics = f'Problem occurred in prototype {case["prototype_label"]}\nReference fractional positions and species:\n'
+        for atom in atoms:
+            for position in atom.scaled_position:
+                diagnostics += f'{position:8.4f}'
+            diagnostics += f'    {virtual_to_real_species_map[atom.symbol]}\n'
+        diagnostics += '\nComputed fractional positions and species:\n'
+        for equation in equations:
+            scaled_position = np.matmul(equation["equations"],internal_parameter_values_and_one)
+            for position in scaled_position:
+                diagnostics += f'{position:8.4f}'
+            diagnostics += f'    {equation["species"]}\n'
+
+        equation_matched = [False]*len(equations)
+        for atom in atoms:
+            found_match = False
+            for i,equation in enumerate(equations):
+                position_differences = np.matmul(equation["equations"],internal_parameter_values_and_one) - atom.scaled_position
+                if np.allclose(position_differences,np.rint(position_differences),atol=1e-5):
+                    assert equation["species"] == virtual_to_real_species_map[atom.symbol], \
+                        f"get_equations_from_prototype got incorrect species, got {equation['species']} expected {virtual_to_real_species_map[atom.symbol]}\n\n{diagnostics}"
+                    assert not found_match, f"get_equations_from_prototype gave equations that matched the same atom twice!\n\n{diagnostics}"
+                    assert not equation_matched[i], f"get_equations_from_prototype gave an equations that matched two different atoms!\n\n{diagnostics}"
+                    found_match = True
+                    equation_matched[i] = True
+            assert found_match, \
+                f"get_equations_from_prototype did not produce any equations that match position {atom.scaled_position}.\n\n{diagnostics}"
+
+        print(f'Successfully checked get_equations_from_prototype for label {case["prototype_label"]}')            
 
 if __name__ == '__main__':
-    test_aflow()
+    test_get_equations_from_prototype()
diff --git a/tests/test_CrystalGenomeTest.py b/tests/test_CrystalGenomeTest.py
@@ -34,4 +34,7 @@ def test_cg():
     list_of_crystal_descriptions = query_crystal_genome_structures("LJ_Shifted_Bernardes_1958LowCutoff_Ar__MO_720819638419_004",["Ar"],"A_hP2_194_c")
     test(**list_of_crystal_descriptions[0])    
     assert len(test.property_instances) == 2
-    test.write_property_instances_to_file()
+    test.write_property_instances_to_file()
+
+if __name__ == '__main__':
+    test_cg()
diff --git a/tests/test_KIMTest.py b/tests/test_KIMTest.py
@@ -36,3 +36,7 @@ def test_kimtest():
 
     assert len(test.property_instances) == 6
     test.write_property_instances_to_file()
+
+
+if __name__ == '__main__':
+    test_kimtest()