Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various bug and consistency fixes for CifData and StructureData #2374

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 101 additions & 29 deletions aiida/backends/tests/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,13 +776,85 @@ def test_set_file(self):
# but it does not have a file
with self.assertRaises(AttributeError):
a.filename
#now it has
# now it has
a.set_file(tmpf.name)
a.parse()
a.filename

self.assertNotEquals(f1, f2)

@unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
def test_has_partial_occupancies(self):
import tempfile
from aiida.orm.data.cif import CifData

tests = [
# Unreadable occupations should not count as a partial occupancy
('O 0.5 0.5(1) 0.5 ?', False),
# The default epsilon for deviation of unity for an occupation to be considered partial is 1E-6
('O 0.5 0.5(1) 0.5 1.0(000000001)', False),
# Partial occupancies should be able to deal with parentheses in the value
('O 0.5 0.5(1) 0.5 1.0(000132)', True),
# Partial occupancies should be able to deal with parentheses in the value
('O 0.5 0.5(1) 0.5 0.9(0000132)', True),
]

for test_string, result in tests:
with tempfile.NamedTemporaryFile(mode='w+') as handle:
handle.write("""
data_test
loop_
_atom_site_label
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
{}
""".format(test_string))
handle.flush()
cif = CifData(file=handle.name)
self.assertEqual(cif.has_partial_occupancies, result)

@unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
def test_has_unknown_species(self):
import tempfile
from aiida.orm.data.cif import CifData

tests = [
('H2 O', False), # No unknown species
('OsAx', True), # Ax is an unknown specie
('UX', True), # X counts as unknown specie despite being defined in aiida.common.constants.elements
('', None), # If no chemical formula is defined, None should be returned
]

for formula, result in tests:
with tempfile.NamedTemporaryFile(mode='w+') as handle:
formula_string = "_chemical_formula_sum '{}'".format(formula) if formula else '\n'
handle.write("""data_test\n{}\n""".format(formula_string))
handle.flush()
cif = CifData(file=handle.name)
self.assertEqual(cif.has_unknown_species, result, formula_string)

@unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
def test_has_undefined_atomic_sites(self):
import tempfile
from aiida.orm.data.cif import CifData

tests = [
('C 0.0 0.0 0.0', False), # Should return False because all sites have valid coordinates
('C 0.0 0.0 ?', True), # Should return True because one site has an undefined coordinate
('', True), # Should return True if no sites defined at all
]

for test_string, result in tests:
with tempfile.NamedTemporaryFile(mode='w+') as handle:
base = 'loop_\n_atom_site_label\n_atom_site_fract_x\n_atom_site_fract_y\n_atom_site_fract_z'
atomic_site_string = '{}\n{}'.format(base, test_string) if test_string else ''
handle.write("""data_test\n{}\n""".format(atomic_site_string))
handle.flush()
cif = CifData(file=handle.name)
self.assertEqual(cif.has_undefined_atomic_sites, result)


class TestKindValidSymbols(AiidaTestCase):
"""
Expand Down Expand Up @@ -917,8 +989,8 @@ def test_sum_one_general(self):
from aiida.orm.data.structure import Kind

a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 2. / 3.])
self.assertTrue(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertFalse(a.has_vacancies)

def test_sum_less_one_general(self):
"""
Expand All @@ -927,8 +999,8 @@ def test_sum_less_one_general(self):
from aiida.orm.data.structure import Kind

a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 1. / 3.])
self.assertTrue(a.is_alloy())
self.assertTrue(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertTrue(a.has_vacancies)

def test_no_position(self):
"""
Expand All @@ -946,16 +1018,16 @@ def test_simple(self):
from aiida.orm.data.structure import Kind

a = Kind(symbols='Ba')
self.assertFalse(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertFalse(a.is_alloy)
self.assertFalse(a.has_vacancies)

b = Kind(symbols='Ba', weights=1.)
self.assertFalse(b.is_alloy())
self.assertFalse(b.has_vacancies())
self.assertFalse(b.is_alloy)
self.assertFalse(b.has_vacancies)

c = Kind(symbols='Ba', weights=None)
self.assertFalse(c.is_alloy())
self.assertFalse(c.has_vacancies())
self.assertFalse(c.is_alloy)
self.assertFalse(c.has_vacancies)

def test_automatic_name(self):
"""
Expand Down Expand Up @@ -1181,24 +1253,24 @@ def test_cell_ok_and_atoms(self):
a.append_atom(position=(0., 0., 0.), symbols=['Ba'])
a.append_atom(position=(1., 1., 1.), symbols=['Ti'])
a.append_atom(position=(1.2, 1.4, 1.6), symbols=['Ti'])
self.assertFalse(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertFalse(a.is_alloy)
self.assertFalse(a.has_vacancies)
# There should be only two kinds! (two atoms of kind Ti should
# belong to the same kind)
self.assertEquals(len(a.kinds), 2)

a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5])
self.assertTrue(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertFalse(a.has_vacancies)

a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
self.assertTrue(a.is_alloy())
self.assertTrue(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertTrue(a.has_vacancies)

a.clear_kinds()
a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
self.assertFalse(a.is_alloy())
self.assertTrue(a.has_vacancies())
self.assertFalse(a.is_alloy)
self.assertTrue(a.has_vacancies)

def test_cell_ok_and_unknown_atoms(self):
"""
Expand All @@ -1216,24 +1288,24 @@ def test_cell_ok_and_unknown_atoms(self):
a.append_atom(position=(0., 0., 0.), symbols=['Ba'])
a.append_atom(position=(1., 1., 1.), symbols=['X'])
a.append_atom(position=(1.2, 1.4, 1.6), symbols=['X'])
self.assertFalse(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertFalse(a.is_alloy)
self.assertFalse(a.has_vacancies)
# There should be only two kinds! (two atoms of kind X should
# belong to the same kind)
self.assertEquals(len(a.kinds), 2)

a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5])
self.assertTrue(a.is_alloy())
self.assertFalse(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertFalse(a.has_vacancies)

a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
self.assertTrue(a.is_alloy())
self.assertTrue(a.has_vacancies())
self.assertTrue(a.is_alloy)
self.assertTrue(a.has_vacancies)

a.clear_kinds()
a.append_atom(position=(0.5, 1., 1.5), symbols=['X'], weights=[0.5])
self.assertFalse(a.is_alloy())
self.assertTrue(a.has_vacancies())
self.assertFalse(a.is_alloy)
self.assertTrue(a.has_vacancies)

def test_kind_1(self):
"""
Expand Down Expand Up @@ -1971,8 +2043,8 @@ def test_lock(self):
a.pbc = [True, True, True]

_ = a.get_cell_volume()
_ = a.is_alloy()
_ = a.has_vacancies()
_ = a.is_alloy
_ = a.has_vacancies

b = a.clone()
# I check that clone returned an unstored copy and so can be altered
Expand Down
7 changes: 1 addition & 6 deletions aiida/orm/data/array/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def _prepare_xsf(self, index=None, main_file_name=""): # pylint: disable=unused
return_string = "ANIMSTEPS {}\nCRYSTAL\n".format(len(indices))
# Do the checks once and for all here:
structure = self.get_step_structure(index=0)
if structure.is_alloy() or structure.has_vacancies():
if structure.is_alloy or structure.has_vacancies:
raise NotImplementedError("XSF for alloys or systems with vacancies not implemented.")
cells = self.get_cells()
positions = self.get_positions()
Expand All @@ -413,11 +413,6 @@ def _prepare_xsf(self, index=None, main_file_name=""): # pylint: disable=unused

for idx in indices:
return_string += "PRIMVEC {}\n".format(idx + 1)
#~ structure = self.get_step_structure(index=idx)
#~ sites = structure.sites
#~ if structure.is_alloy() or structure.has_vacancies():
#~ raise NotImplementedError("XSF for alloys or systems with "
#~ "vacancies not implemented.")
for cell_vector in cells[idx]:
return_string += ' '.join(["{:18.5f}".format(i) for i in cell_vector])
return_string += "\n"
Expand Down
86 changes: 67 additions & 19 deletions aiida/orm/data/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def parse_formula(formula):
return contents


# pylint: disable=abstract-method
# pylint: disable=abstract-method,too-many-public-methods
# Note: Method 'query' is abstract in class 'Node' but is not overridden
class CifData(SinglefileData):
"""
Expand Down Expand Up @@ -725,26 +725,32 @@ def get_spacegroup_numbers(self):
@property
def has_partial_occupancies(self):
"""
Check if there are float values in the atomic occupancies
Return if the cif data contains partial occupancies

:returns: True if there are partial occupancies, False otherwise
A partial occupancy is defined as site with an occupancy that differs from unity, within a precision of 1E-6

.. note: occupancies that cannot be parsed into a float are ignored

:return: True if there are partial occupancies, False otherwise
"""
epsilon = 1e-6
import re

tag = '_atom_site_occupancy'

epsilon = 1e-6
partial_occupancies = False

for datablock in self.values.keys():
if tag in self.values[datablock].keys():
for site in self.values[datablock][tag]:
# find the float number in the string
bracket = site.find('(')
if bracket == -1:
# no bracket found
if abs(float(site) - 1) > epsilon:
partial_occupancies = True
for position in self.values[datablock][tag]:
try:
# First remove any parentheses to support value like 1.134(56) and then cast to float
occupancy = float(re.sub(r'[\(\)]', '', position))
except ValueError:
pass
else:
# bracket, cut string
if abs(float(site[0:bracket]) - 1) > epsilon:
partial_occupancies = True
if abs(occupancy - 1) > epsilon:
return True

return partial_occupancies

Expand All @@ -765,6 +771,46 @@ def has_attached_hydrogens(self):

return False

@property
def has_undefined_atomic_sites(self):
"""
Return whether the cif data contains any undefined atomic sites.

An undefined atomic site is defined as a site where at least one of the fractional coordinates specified in the
`_atom_site_fract_*` tags, cannot be successfully interpreted as a float. If the cif data contains any site that
matches this description, or it does not contain any atomic site tags at all, the cif data is said to have
undefined atomic sites.

:return: boolean, True if no atomic sites are defined or if any of the defined sites contain undefined positions
and False otherwise
"""
import re

tag_x = '_atom_site_fract_x'
tag_y = '_atom_site_fract_y'
tag_z = '_atom_site_fract_z'

# Some CifData files do not even contain a single `_atom_site_fract_*` tag
has_tags = False

for datablock in self.values.keys():
for tag in [tag_x, tag_y, tag_z]:
if tag in self.values[datablock].keys():
for position in self.values[datablock][tag]:

# The CifData contains at least one `_atom_site_fract_*` tag
has_tags = True

try:
# First remove any parentheses to support value like 1.134(56) and then cast to float
float(re.sub(r'[\(\)]', '', position))
except ValueError:
# Position cannot be converted to a float value, so we have undefined atomic sites
return True

# At this point the file either has no tags at all, or it does and all coordinates were valid floats
return not has_tags

@property
def has_atomic_sites(self):
"""
Expand Down Expand Up @@ -792,16 +838,18 @@ def has_atomic_sites(self):
def has_unknown_species(self):
"""
Returns whether the cif contains atomic species that are not recognized by AiiDA.
The known species are taken from the elements dictionary in aiida.common.constants.
If any of the formula of the cif data contain species that are not in that elements
dictionary, the function will return True and False in all other cases. If there is
no formulae to be found, it will return None

The known species are taken from the elements dictionary in `aiida.common.constants`, with the exception of
the "unknown" placeholder element with symbol 'X', as this could not be used to construct a real structure.
If any of the formula of the cif data contain species that are not in that elements dictionary, the function
will return True and False in all other cases. If there is no formulae to be found, it will return None

:returns: True when there are unknown species in any of the formulae, False if not, None if no formula found
"""
from aiida.common.constants import elements

known_species = [element['symbol'] for element in elements.values()]
# Get all the elements known by AiiDA, excluding the "unknown" element with symbol 'X'
known_species = [element['symbol'] for element in elements.values() if element['symbol'] != 'X']

for formula in self.get_formulae():

Expand Down
Loading