Skip to content

Commit

Permalink
add functionality to extract equations from aflow --proto
Browse files Browse the repository at this point in the history
  • Loading branch information
ilia-nikiforov-umn committed Dec 9, 2024
1 parent 32e74cb commit d001e5c
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 23 deletions.
60 changes: 50 additions & 10 deletions kim_tools/aflow_util/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
from curses.ascii import isalpha, isupper, isdigit
from typing import Dict, List, Tuple, Union, Optional
from tempfile import NamedTemporaryFile
from sympy import parse_expr,matrix2numpy,linear_eq_to_matrix,Symbol

__author__ = ["ilia Nikiforov", "Ellad Tadmor"]
__all__ = [
"split_parameter_array",
"internal_parameter_sort_key",
"get_stoich_reduced_list_from_prototype",
"get_species_list_from_string",
"read_shortnames",
Expand Down Expand Up @@ -83,6 +85,16 @@ def split_parameter_array(parameter_names: List[str], list_to_split: Optional[Li

return cell_part, internal_part

def internal_parameter_sort_key(parameter_name: Union[Symbol,str] ) -> int:
"""
Sorting key for internal free parameters. Sort by number first, then letter
"""
parameter_name_str = str(parameter_name)
axis = parameter_name_str[0]
assert axis == 'x' or axis == 'y' or axis == 'z', 'Parameter name must start with x, y, or z'
number = int(parameter_name_str[1:])
return 1000*number + ord(axis)

def get_stoich_reduced_list_from_prototype(prototype_label: str) -> List[int]:
"""
Get numerical list of stoichiometry from prototype label, i.e. "AB3\_...." -> [1,3]
Expand Down Expand Up @@ -316,7 +328,7 @@ def __init__(self, aflow_executable:str="aflow", aflow_work_dir:str="",np:int=1)
else:
self.aflow_work_dir = aflow_work_dir

def aflow_command(self, cmd: List[str]) -> str:
def aflow_command(self, cmd: Union[str,List[str]]) -> str:
"""
Run AFLOW executable with specified arguments and return the output, possibly multiple times piping outputs to each other
Expand All @@ -330,6 +342,9 @@ def aflow_command(self, cmd: List[str]) -> str:
Returns:
Output of the AFLOW command
"""
if not isinstance(cmd,list):
cmd = [cmd]

cmd_list = [self.aflow_executable + " --np=" + str(self.np) + " " + cmd_inst
for cmd_inst in cmd]
cmd_str = " | ".join(cmd_list)
Expand Down Expand Up @@ -665,7 +680,7 @@ def build_atoms_from_prototype(

return atoms

def get_equations_from_prototype(self, prototype_label: str, parameter_values: List[float], species: Optional[List[str]] = None) -> \
def get_equations_from_prototype(self, prototype_label: str, parameter_values: List[float]) -> \
Tuple[List[Dict],List[str]]:
"""
Get the symbolic equations for the fractional positions in the unit cell of an AFLOW prototype
Expand All @@ -675,16 +690,41 @@ def get_equations_from_prototype(self, prototype_label: str, parameter_values: L
An AFLOW prototype label, without an enumeration suffix, without specified atomic species
parameter_values:
The free parameters of the AFLOW prototype designation
species:
Stoichiometric species, e.g. ``['Mo','S']`` corresponding to A and B respectively for prototype label AB2_hP6_194_c_f indicating molybdenite.
If this is omitted, the equations will be returned with symbolic species (i.e. A, B, etc.)
Returns:
Two lists. The second list is a list of the names internal free parameters of the crystal sorted according to AFLOW convention (e.g. ['x1','x2','y2'])
The first list contains one dictionary for each atomic position. It has keys 'equation' and 'species'. The 'equation' is a 3-by-n numpy matrix, where
n is the number of internal free parameters. This way, multiplying this matrix by the vector of internal free parameters results in a column vector
of fractional coordinates for that atom.
The first list contains one dictionary for each atomic position. It has keys 'equations' and 'species'. The 'equations' is a 3-by-(n+1) numpy matrix, where
n is the number of internal free parameters. This way, multiplying this matrix by the vector of internal free parameters plus '1' (for the constant terms)
results in a column vector of fractional coordinates for that atom. 'species' is the virtual species (e.g. A,B etc)
"""


equation_poscar = self.aflow_command(f'--proto={prototype_label} --params={",".join([str(param) for param in parameter_values])} --add_equations') # equations_only is buggy

# First, parse the lines into equations and construct set of free parameters. While at it, populate the symbols for return
list_of_systems_of_equations = [] # list of lists
free_params_set = set() # data type: sympy.Symbol
return_list = []
seen_this_many_lines_starting_with_direct = 0
for line in equation_poscar.splitlines():
if seen_this_many_lines_starting_with_direct < 2:
if line.startswith('Direct('):
seen_this_many_lines_starting_with_direct += 1
continue
line_split = line.split()
return_list.append({'species':line_split[3]})
system_of_equations = []
for expression_string in line_split[:3]:
coordinate_expr = parse_expr(expression_string)
free_params_set.update(coordinate_expr.free_symbols)
system_of_equations.append(coordinate_expr)
list_of_systems_of_equations.append(system_of_equations)

free_params_list = list(free_params_set) # data type: sympy.Symbol
free_params_list.sort(key = internal_parameter_sort_key)

# loop a second time (necessary because we needed to have constructed the set of free parameters first)
for return_dict,system_of_equations in zip(return_list,list_of_systems_of_equations):
a,b = linear_eq_to_matrix(system_of_equations,free_params_list)
return_dict['equations'] = np.concatenate((matrix2numpy(a,dtype=np.float64),matrix2numpy(-b,dtype=np.float64)),axis=1)

return return_list,[str(free_param) for free_param in free_params_list]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
long_description_content_type="text/markdown",
packages=setuptools.find_packages(),
license="CDDL",
install_requires=["ase >= 3.19.0b1", "kim-property >= 2.6.2", "kim-edn >= 1.4.1", "spglib >= 2.1.0", "kim-query >= 3.0.0"],
install_requires=["ase >= 3.19.0b1", "kim-property >= 2.6.2", "kim-edn >= 1.4.1", "spglib >= 2.1.0", "kim-query >= 3.0.0", "sympy >= 1.13.2"],
classifiers=[
"Development Status :: 4 - Beta"
"License :: OSI Approved :: Common Development and Distribution License 1.0 (CDDL-1.0)",
Expand Down
58 changes: 47 additions & 11 deletions tests/test_AFLOW.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python

from kim_tools import AFLOW, split_parameter_array
import numpy as np

Expand All @@ -7,31 +9,65 @@
{"species": ["U"], "prototype_label": "A_oC4_63_c", "parameter_names": ["a", "b/a", "c/a", "y1"], "parameter_values": [3.38, 1.7635799, 1.6884024, 0.14768057]},
{"species": ["Mo", "Ni"], "prototype_label": "AB4_tI10_87_a_h", "parameter_names": ["a", "c/a", "x2", "y2"], "parameter_values": [5.6931, 0.62094465, 0.40159241, 0.80168077]},
{"species": ["O", "Ti"], "prototype_label": "A2B_tP6_136_f_a", "parameter_names": ["a", "c/a", "x2"], "parameter_values": [4.6726, 0.64700595, 0.30516921]},
{"species": ["Al", "Pd"], "prototype_label": "AB_hR26_148_a2f_b2f", "parameter_names": ["a", "c/a", "x3", "y3", "z3", "x4", "y4", "z4", "x5", "y5", "z5", "x6", "y6", "z6"], "parameter_values": [15.732393, 0.33556832, 0.5574733, 0.84810628, 0.59879838, 0.25162269, 0.65374741, 0.096413033, 0.056906892, 0.34500705, 0.098395144, 0.75098957, 0.15501208, 0.59773116]},
#{"species": ["Al", "Li"], "prototype_label": "A2B3_hR5_166_c_ac", "parameter_names": ["a", "c/a", "x2", "x3"], "parameter_values": [4.4631793, 3.1571619, 0.19703494, 0.5976076]},
#{"species": ["Al", "C"], "prototype_label": "A4B3_hR7_166_2c_ac", "parameter_names": ["a", "c/a", "x2", "x3", "x4"], "parameter_values": [3.3544538, 7.492051, 0.70650219, 0.87016163, 0.78319433]},
#{"species": ["Al", "Au"], "prototype_label": "A3B8_hR44_167_bce_2c2f", "parameter_names": ["a", "c/a", "x2", "x3", "x4", "x5", "x6", "y6", "z6", "x7", "y7", "z7"], "parameter_values": [7.8105146, 5.4347116, 0.15613556, 0.21753425, 0.93687697, 0.56692249, 0.64901599, 0.0043751172, 0.29495592, 0.61541438, 0.35227113, 0.87563146]},
#{"species": ["Al", "Pd"], "prototype_label": "AB_hR26_148_a2f_b2f", "parameter_names": ["a", "c/a", "x3", "y3", "z3", "x4", "y4", "z4", "x5", "y5", "z5", "x6", "y6", "z6"], "parameter_values": [15.732393, 0.33556832, 0.5574733, 0.84810628, 0.59879838, 0.25162269, 0.65374741, 0.096413033, 0.056906892, 0.34500705, 0.098395144, 0.75098957, 0.15501208, 0.59773116]},
{"species": ["Mo", "S"], "prototype_label": "AB2_hR3_166_a_c", "parameter_names": ["a", "c/a", "x2"], "parameter_values": [3.2047217, 5.9412636, 0.24969054]},
{"species": ["O", "Si"], "prototype_label": "A2B_hP9_154_c_a", "parameter_names": ["a", "c/a", "x1", "x2", "y2", "z2"], "parameter_values": [5.0682, 1.0946293, 0.48489843, 0.41536476, 0.23922009, 0.80786585]},
{"species": ["Fe", "P"], "prototype_label": "A2B_hP9_189_fg_ad", "parameter_names": ["a", "c/a", "x3", "x4"], "parameter_values": [6.4943, 0.52673883, 0.41418679, 0.73946286]},
{"species": ["As", "Ga"], "prototype_label": "AB_cP16_205_c_c", "parameter_names": ["a", "x1", "x2"], "parameter_values": [7.0281, 0.14318501, 0.34443472]},
{"species": ["Ce", "O"], "prototype_label": "AB2_cF12_225_a_c", "parameter_names": ["a"], "parameter_values": [5.4908]},
{"species": ["Al"], "prototype_label": "A_cF4_225_a", "parameter_names": ["a"], "parameter_values": [4.039]},
]

def test_aflow():
def test_get_equations_from_prototype():
aflow = AFLOW()
for case in TEST_CASES:
parameter_names = case.pop("parameter_names")
_, internal_parameter_names_ref = split_parameter_array(parameter_names)
_, internal_parameter_values = split_parameter_array(parameter_names,case["parameter_values"])
internal_parameter_values = np.asarray(internal_parameter_values)
internal_parameter_values_and_one = np.asarray(internal_parameter_values+[1])
atoms = aflow.build_atoms_from_prototype(**case)
species = case.pop("species")
equations, internal_parameter_names = aflow.get_equations_from_prototype(**case)
assert internal_parameter_names == internal_parameter_names_ref, "get_equations_from_prototype got incorrect internal parameter names"

assert internal_parameter_names == internal_parameter_names_ref, \
f"get_equations_from_prototype got incorrect internal parameter names, got {internal_parameter_names} expected {internal_parameter_names_ref}"
assert len(equations) == len(atoms), "get_equations_from_prototype got an incorrect number of equations"
for equation,atom in zip(equations,atoms):
assert equation["species"] == atom.symbol, "get_equations_from_prototype got incorrect species"
position_differences = np.matmul(equation["equation"],internal_parameter_values) - atom.scaled_position
for position_difference in position_differences:
assert (np.allclose(position_difference,-1.0) or np.allclose(position_difference,0.0) or np.allclose(position_difference,1.0)), \
"get_equations_from_prototype got incorrect position"

virtual_to_real_species_map = {}
for i,symbol in enumerate(species):
virtual_to_real_species_map[symbol]=chr(65+i)

diagnostics = f'Problem occurred in prototype {case["prototype_label"]}\nReference fractional positions and species:\n'
for atom in atoms:
for position in atom.scaled_position:
diagnostics += f'{position:8.4f}'
diagnostics += f' {virtual_to_real_species_map[atom.symbol]}\n'
diagnostics += '\nComputed fractional positions and species:\n'
for equation in equations:
scaled_position = np.matmul(equation["equations"],internal_parameter_values_and_one)
for position in scaled_position:
diagnostics += f'{position:8.4f}'
diagnostics += f' {equation["species"]}\n'

equation_matched = [False]*len(equations)
for atom in atoms:
found_match = False
for i,equation in enumerate(equations):
position_differences = np.matmul(equation["equations"],internal_parameter_values_and_one) - atom.scaled_position
if np.allclose(position_differences,np.rint(position_differences),atol=1e-5):
assert equation["species"] == virtual_to_real_species_map[atom.symbol], \
f"get_equations_from_prototype got incorrect species, got {equation['species']} expected {virtual_to_real_species_map[atom.symbol]}\n\n{diagnostics}"
assert not found_match, f"get_equations_from_prototype gave equations that matched the same atom twice!\n\n{diagnostics}"
assert not equation_matched[i], f"get_equations_from_prototype gave an equations that matched two different atoms!\n\n{diagnostics}"
found_match = True
equation_matched[i] = True
assert found_match, \
f"get_equations_from_prototype did not produce any equations that match position {atom.scaled_position}.\n\n{diagnostics}"

print(f'Successfully checked get_equations_from_prototype for label {case["prototype_label"]}')

if __name__ == '__main__':
test_aflow()
test_get_equations_from_prototype()
5 changes: 4 additions & 1 deletion tests/test_CrystalGenomeTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,7 @@ def test_cg():
list_of_crystal_descriptions = query_crystal_genome_structures("LJ_Shifted_Bernardes_1958LowCutoff_Ar__MO_720819638419_004",["Ar"],"A_hP2_194_c")
test(**list_of_crystal_descriptions[0])
assert len(test.property_instances) == 2
test.write_property_instances_to_file()
test.write_property_instances_to_file()

if __name__ == '__main__':
test_cg()
4 changes: 4 additions & 0 deletions tests/test_KIMTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ def test_kimtest():

assert len(test.property_instances) == 6
test.write_property_instances_to_file()


if __name__ == '__main__':
test_kimtest()

0 comments on commit d001e5c

Please sign in to comment.