From 153f8895f56687604d6fead26acd1aaea06d788d Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Tue, 8 Jan 2019 15:47:23 +0100 Subject: [PATCH 1/4] Fix bug in `CifData.has_partial_occupancies` for non-float values For `CifData` with values of the `_atom_site_occupancy` tag that cannot be parsed into a float, the `has_partial_occupancies` property would except. Here we properly check whether the value can be cast to a float after removing any parentheses that may be present. --- aiida/backends/tests/dataclasses.py | 32 +++++++++++++++++++++++++++++ aiida/orm/data/cif.py | 32 +++++++++++++++++------------ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/aiida/backends/tests/dataclasses.py b/aiida/backends/tests/dataclasses.py index f6de508707..db4f220dbc 100644 --- a/aiida/backends/tests/dataclasses.py +++ b/aiida/backends/tests/dataclasses.py @@ -783,6 +783,38 @@ def test_set_file(self): self.assertNotEquals(f1, f2) + @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW") + def test_has_partial_occupancies(self): + import tempfile + from aiida.orm.data.cif import CifData + + tests = [ + # Unreadable occupations should not count as a partial occupancy + ('O 0.5 0.5(1) 0.5 ?', False), + # The default epsilon for deviation of unity for an occupation to be considered partial is 1E-6 + ('O 0.5 0.5(1) 0.5 1.0(000000001)', False), + # Partial occupancies should be able to deal with parentheses in the value + ('O 0.5 0.5(1) 0.5 1.0(000132)', True), + # Partial occupancies should be able to deal with parentheses in the value + ('O 0.5 0.5(1) 0.5 0.9(0000132)', True), + ] + + for test_string, result in tests: + with tempfile.NamedTemporaryFile(mode='w+') as handle: + handle.write(""" + data_test + loop_ + _atom_site_label + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + {} + """.format(test_string)) + handle.flush() + cif = CifData(file=handle.name) + self.assertEqual(cif.has_partial_occupancies, result) + class TestKindValidSymbols(AiidaTestCase): """ diff --git a/aiida/orm/data/cif.py b/aiida/orm/data/cif.py index f722fd3c3c..76f25af600 100644 --- a/aiida/orm/data/cif.py +++ b/aiida/orm/data/cif.py @@ -725,26 +725,32 @@ def get_spacegroup_numbers(self): @property def has_partial_occupancies(self): """ - Check if there are float values in the atomic occupancies + Return if the cif data contains partial occupancies - :returns: True if there are partial occupancies, False otherwise + A partial occupancy is defined as site with an occupancy that differs from unity, within a precision of 1E-6 + + .. note: occupancies that cannot be parsed into a float are ignored + + :return: True if there are partial occupancies, False otherwise """ - epsilon = 1e-6 + import re + tag = '_atom_site_occupancy' + + epsilon = 1e-6 partial_occupancies = False + for datablock in self.values.keys(): if tag in self.values[datablock].keys(): - for site in self.values[datablock][tag]: - # find the float number in the string - bracket = site.find('(') - if bracket == -1: - # no bracket found - if abs(float(site) - 1) > epsilon: - partial_occupancies = True + for position in self.values[datablock][tag]: + try: + # First remove any parentheses to support value like 1.134(56) and then cast to float + occupancy = float(re.sub(r'[\(\)]', '', position)) + except ValueError: + pass else: - # bracket, cut string - if abs(float(site[0:bracket]) - 1) > epsilon: - partial_occupancies = True + if abs(occupancy - 1) > epsilon: + return True return partial_occupancies From 9cef27d03653228a29b078eadd2cae53be318575 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Tue, 8 Jan 2019 16:10:18 +0100 Subject: [PATCH 2/4] Consider `X` to be an unknown species in `CifData.has_unknown_species` The `CifData.has_unknown_species` return whether the chemical formula contains any unknown species, which were defined as those species that are not listed in `aiida.common.constants.elements`. This was intended to filter out those cif files that contained species that could not be parsed into a `StructureData` that could actually be used for a condensed matter calculation, since the presence of an unknown species would render that impossible. However, recently, the specie `X` was added to the internal dictionary of elements, breaking the method. Here we explicitly ignore `X` in the `has_unknown_species` method, returning its implementation to its original intention. --- aiida/backends/tests/dataclasses.py | 21 +++++++++++++++++++++ aiida/orm/data/cif.py | 12 +++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/aiida/backends/tests/dataclasses.py b/aiida/backends/tests/dataclasses.py index db4f220dbc..b58348feca 100644 --- a/aiida/backends/tests/dataclasses.py +++ b/aiida/backends/tests/dataclasses.py @@ -815,6 +815,27 @@ def test_has_partial_occupancies(self): cif = CifData(file=handle.name) self.assertEqual(cif.has_partial_occupancies, result) + @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW") + def test_has_unknown_species(self): + import tempfile + from aiida.orm.data.cif import CifData + + tests = [ + ('H2 O', False), # No unknown species + ('OsAx', True), # Ax is an unknown specie + ('UX', True), # X counts as unknown specie despite being defined in aiida.common.constants.elements + ('', None), # If no chemical formula is defined, None should be returned + ] + + for formula, result in tests: + # Unreadable occupations should not count as a partial occupancy + with tempfile.NamedTemporaryFile(mode='w+') as handle: + formula_string = "_chemical_formula_sum '{}'".format(formula) if formula else '\n' + handle.write("""data_test\n{}\n""".format(formula_string)) + handle.flush() + cif = CifData(file=handle.name) + self.assertEqual(cif.has_unknown_species, result, formula_string) + class TestKindValidSymbols(AiidaTestCase): """ diff --git a/aiida/orm/data/cif.py b/aiida/orm/data/cif.py index 76f25af600..e90b4b1d1f 100644 --- a/aiida/orm/data/cif.py +++ b/aiida/orm/data/cif.py @@ -798,16 +798,18 @@ def has_atomic_sites(self): def has_unknown_species(self): """ Returns whether the cif contains atomic species that are not recognized by AiiDA. - The known species are taken from the elements dictionary in aiida.common.constants. - If any of the formula of the cif data contain species that are not in that elements - dictionary, the function will return True and False in all other cases. If there is - no formulae to be found, it will return None + + The known species are taken from the elements dictionary in `aiida.common.constants`, with the exception of + the "unknown" placeholder element with symbol 'X', as this could not be used to construct a real structure. + If any of the formula of the cif data contain species that are not in that elements dictionary, the function + will return True and False in all other cases. If there is no formulae to be found, it will return None :returns: True when there are unknown species in any of the formulae, False if not, None if no formula found """ from aiida.common.constants import elements - known_species = [element['symbol'] for element in elements.values()] + # Get all the elements known by AiiDA, excluding the "unknown" element with symbol 'X' + known_species = [element['symbol'] for element in elements.values() if element['symbol'] != 'X'] for formula in self.get_formulae(): From 84cdb0aba0c26ba0697017622bb67936a99949fc Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Tue, 8 Jan 2019 16:40:57 +0100 Subject: [PATCH 3/4] Implement the `CifData.has_unknown_atomic_sites` property This property will verify that all coordinates of all atomic sites can be properly converted into float values. If just one coordinate cannot be successfully converted to a float, the cif is said to have unknown atomic sites, as it will not be able to be parsed into a usable `StructureData` object for further calculations. --- aiida/backends/tests/dataclasses.py | 23 ++++++++++++++-- aiida/orm/data/cif.py | 42 ++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/aiida/backends/tests/dataclasses.py b/aiida/backends/tests/dataclasses.py index b58348feca..b2967d37e3 100644 --- a/aiida/backends/tests/dataclasses.py +++ b/aiida/backends/tests/dataclasses.py @@ -776,7 +776,7 @@ def test_set_file(self): # but it does not have a file with self.assertRaises(AttributeError): a.filename - #now it has + # now it has a.set_file(tmpf.name) a.parse() a.filename @@ -828,7 +828,6 @@ def test_has_unknown_species(self): ] for formula, result in tests: - # Unreadable occupations should not count as a partial occupancy with tempfile.NamedTemporaryFile(mode='w+') as handle: formula_string = "_chemical_formula_sum '{}'".format(formula) if formula else '\n' handle.write("""data_test\n{}\n""".format(formula_string)) @@ -836,6 +835,26 @@ def test_has_unknown_species(self): cif = CifData(file=handle.name) self.assertEqual(cif.has_unknown_species, result, formula_string) + @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW") + def test_has_undefined_atomic_sites(self): + import tempfile + from aiida.orm.data.cif import CifData + + tests = [ + ('C 0.0 0.0 0.0', False), # Should return False because all sites have valid coordinates + ('C 0.0 0.0 ?', True), # Should return True because one site has an undefined coordinate + ('', True), # Should return True if no sites defined at all + ] + + for test_string, result in tests: + with tempfile.NamedTemporaryFile(mode='w+') as handle: + base = 'loop_\n_atom_site_label\n_atom_site_fract_x\n_atom_site_fract_y\n_atom_site_fract_z' + atomic_site_string = '{}\n{}'.format(base, test_string) if test_string else '' + handle.write("""data_test\n{}\n""".format(atomic_site_string)) + handle.flush() + cif = CifData(file=handle.name) + self.assertEqual(cif.has_undefined_atomic_sites, result) + class TestKindValidSymbols(AiidaTestCase): """ diff --git a/aiida/orm/data/cif.py b/aiida/orm/data/cif.py index e90b4b1d1f..77f0aac823 100644 --- a/aiida/orm/data/cif.py +++ b/aiida/orm/data/cif.py @@ -414,7 +414,7 @@ def parse_formula(formula): return contents -# pylint: disable=abstract-method +# pylint: disable=abstract-method,too-many-public-methods # Note: Method 'query' is abstract in class 'Node' but is not overridden class CifData(SinglefileData): """ @@ -771,6 +771,46 @@ def has_attached_hydrogens(self): return False + @property + def has_undefined_atomic_sites(self): + """ + Return whether the cif data contains any undefined atomic sites. + + An undefined atomic site is defined as a site where at least one of the fractional coordinates specified in the + `_atom_site_fract_*` tags, cannot be successfully interpreted as a float. If the cif data contains any site that + matches this description, or it does not contain any atomic site tags at all, the cif data is said to have + undefined atomic sites. + + :return: boolean, True if no atomic sites are defined or if any of the defined sites contain undefined positions + and False otherwise + """ + import re + + tag_x = '_atom_site_fract_x' + tag_y = '_atom_site_fract_y' + tag_z = '_atom_site_fract_z' + + # Some CifData files do not even contain a single `_atom_site_fract_*` tag + has_tags = False + + for datablock in self.values.keys(): + for tag in [tag_x, tag_y, tag_z]: + if tag in self.values[datablock].keys(): + for position in self.values[datablock][tag]: + + # The CifData contains at least one `_atom_site_fract_*` tag + has_tags = True + + try: + # First remove any parentheses to support value like 1.134(56) and then cast to float + float(re.sub(r'[\(\)]', '', position)) + except ValueError: + # Position cannot be converted to a float value, so we have undefined atomic sites + return True + + # At this point the file either has no tags at all, or it does and all coordinates were valid floats + return not has_tags + @property def has_atomic_sites(self): """ From 23895b275c9a9b305fdb72fb096e3957d6b09547 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 9 Jan 2019 16:33:13 +0100 Subject: [PATCH 4/4] Homogenize use of boolean properties for `Kind` and `StructureData` The `Kind` and `StructureData` were using an inconsisten mixture of methods and properties for boolean attributes, such as `has_vacancies` and `is_alloy`. To conform to `CifData` these are now all turned into properties. --- aiida/backends/tests/dataclasses.py | 56 ++++++++++----------- aiida/orm/data/array/trajectory.py | 7 +-- aiida/orm/data/structure.py | 53 +++++++------------ docs/source/examples/structure_tutorial.rst | 4 +- 4 files changed, 49 insertions(+), 71 deletions(-) diff --git a/aiida/backends/tests/dataclasses.py b/aiida/backends/tests/dataclasses.py index b2967d37e3..9e25c4174c 100644 --- a/aiida/backends/tests/dataclasses.py +++ b/aiida/backends/tests/dataclasses.py @@ -989,8 +989,8 @@ def test_sum_one_general(self): from aiida.orm.data.structure import Kind a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 2. / 3.]) - self.assertTrue(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertFalse(a.has_vacancies) def test_sum_less_one_general(self): """ @@ -999,8 +999,8 @@ def test_sum_less_one_general(self): from aiida.orm.data.structure import Kind a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 1. / 3.]) - self.assertTrue(a.is_alloy()) - self.assertTrue(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertTrue(a.has_vacancies) def test_no_position(self): """ @@ -1018,16 +1018,16 @@ def test_simple(self): from aiida.orm.data.structure import Kind a = Kind(symbols='Ba') - self.assertFalse(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertFalse(a.is_alloy) + self.assertFalse(a.has_vacancies) b = Kind(symbols='Ba', weights=1.) - self.assertFalse(b.is_alloy()) - self.assertFalse(b.has_vacancies()) + self.assertFalse(b.is_alloy) + self.assertFalse(b.has_vacancies) c = Kind(symbols='Ba', weights=None) - self.assertFalse(c.is_alloy()) - self.assertFalse(c.has_vacancies()) + self.assertFalse(c.is_alloy) + self.assertFalse(c.has_vacancies) def test_automatic_name(self): """ @@ -1253,24 +1253,24 @@ def test_cell_ok_and_atoms(self): a.append_atom(position=(0., 0., 0.), symbols=['Ba']) a.append_atom(position=(1., 1., 1.), symbols=['Ti']) a.append_atom(position=(1.2, 1.4, 1.6), symbols=['Ti']) - self.assertFalse(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertFalse(a.is_alloy) + self.assertFalse(a.has_vacancies) # There should be only two kinds! (two atoms of kind Ti should # belong to the same kind) self.assertEquals(len(a.kinds), 2) a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5]) - self.assertTrue(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertFalse(a.has_vacancies) a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5]) - self.assertTrue(a.is_alloy()) - self.assertTrue(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertTrue(a.has_vacancies) a.clear_kinds() a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5]) - self.assertFalse(a.is_alloy()) - self.assertTrue(a.has_vacancies()) + self.assertFalse(a.is_alloy) + self.assertTrue(a.has_vacancies) def test_cell_ok_and_unknown_atoms(self): """ @@ -1288,24 +1288,24 @@ def test_cell_ok_and_unknown_atoms(self): a.append_atom(position=(0., 0., 0.), symbols=['Ba']) a.append_atom(position=(1., 1., 1.), symbols=['X']) a.append_atom(position=(1.2, 1.4, 1.6), symbols=['X']) - self.assertFalse(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertFalse(a.is_alloy) + self.assertFalse(a.has_vacancies) # There should be only two kinds! (two atoms of kind X should # belong to the same kind) self.assertEquals(len(a.kinds), 2) a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5]) - self.assertTrue(a.is_alloy()) - self.assertFalse(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertFalse(a.has_vacancies) a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5]) - self.assertTrue(a.is_alloy()) - self.assertTrue(a.has_vacancies()) + self.assertTrue(a.is_alloy) + self.assertTrue(a.has_vacancies) a.clear_kinds() a.append_atom(position=(0.5, 1., 1.5), symbols=['X'], weights=[0.5]) - self.assertFalse(a.is_alloy()) - self.assertTrue(a.has_vacancies()) + self.assertFalse(a.is_alloy) + self.assertTrue(a.has_vacancies) def test_kind_1(self): """ @@ -2043,8 +2043,8 @@ def test_lock(self): a.pbc = [True, True, True] _ = a.get_cell_volume() - _ = a.is_alloy() - _ = a.has_vacancies() + _ = a.is_alloy + _ = a.has_vacancies b = a.clone() # I check that clone returned an unstored copy and so can be altered diff --git a/aiida/orm/data/array/trajectory.py b/aiida/orm/data/array/trajectory.py index 1b4b0d62db..a4971b248f 100644 --- a/aiida/orm/data/array/trajectory.py +++ b/aiida/orm/data/array/trajectory.py @@ -403,7 +403,7 @@ def _prepare_xsf(self, index=None, main_file_name=""): # pylint: disable=unused return_string = "ANIMSTEPS {}\nCRYSTAL\n".format(len(indices)) # Do the checks once and for all here: structure = self.get_step_structure(index=0) - if structure.is_alloy() or structure.has_vacancies(): + if structure.is_alloy or structure.has_vacancies: raise NotImplementedError("XSF for alloys or systems with vacancies not implemented.") cells = self.get_cells() positions = self.get_positions() @@ -413,11 +413,6 @@ def _prepare_xsf(self, index=None, main_file_name=""): # pylint: disable=unused for idx in indices: return_string += "PRIMVEC {}\n".format(idx + 1) - #~ structure = self.get_step_structure(index=idx) - #~ sites = structure.sites - #~ if structure.is_alloy() or structure.has_vacancies(): - #~ raise NotImplementedError("XSF for alloys or systems with " - #~ "vacancies not implemented.") for cell_vector in cells[idx]: return_string += ' '.join(["{:18.5f}".format(i) for i in cell_vector]) return_string += "\n" diff --git a/aiida/orm/data/structure.py b/aiida/orm/data/structure.py index 2c0500592f..30c1ada369 100644 --- a/aiida/orm/data/structure.py +++ b/aiida/orm/data/structure.py @@ -997,7 +997,7 @@ def _prepare_xsf(self, main_file_name=""): """ Write the given structure to a string of format XSF (for XCrySDen). """ - if self.is_alloy() or self.has_vacancies(): + if self.is_alloy or self.has_vacancies: raise NotImplementedError("XSF for alloys or systems with " "vacancies not implemented.") @@ -1111,7 +1111,7 @@ def _prepare_xyz(self, main_file_name=""): """ Write the given structure to a string of format XYZ. """ - if self.is_alloy() or self.has_vacancies(): + if self.is_alloy or self.has_vacancies: raise NotImplementedError("XYZ for alloys or systems with " "vacancies not implemented.") @@ -1857,21 +1857,21 @@ def cell_angles(self, value): def set_cell_angles(self, value): raise NotImplementedError("Modification is not implemented yet") + @property def is_alloy(self): - """ - To understand if there are alloys in the structure. + """Return whether the structure contains any alloy kinds. :return: a boolean, True if at least one kind is an alloy """ - return any(s.is_alloy() for s in self.kinds) + return any(kind.is_alloy for kind in self.kinds) + @property def has_vacancies(self): - """ - To understand if there are vacancies in the structure. + """Return whether the structure has vacancies in the structure. :return: a boolean, True if at least one kind has a vacancy """ - return any(s.has_vacancies() for s in self.kinds) + return any(kind.has_vacancies for kind in self.kinds) def get_cell_volume(self): """ @@ -2186,23 +2186,6 @@ def get_raw(self): 'name': self.name, } - # def get_ase(self): - - # """ - # Return a ase.Atom object for this kind, setting the position to - # the origin. - # - # Note: If any site is an alloy or has vacancies, a ValueError is - # raised (from the site.get_ase() routine). - # """ - # import ase - # if self.is_alloy() or self.has_vacancies(): - # raise ValueError("Cannot convert to ASE if the site is an alloy " - # "or has vacancies.") - # aseatom = ase.Atom(position=[0.,0.,0.], symbol=self.symbols[0], - # mass=self.mass) - # return aseatom - def reset_mass(self): """ Reset the mass to the automatic calculated value. @@ -2421,21 +2404,21 @@ def set_symbols_and_weights(self, symbols, weights): self._symbols = symbols_tuple self._weights = weights_tuple + @property def is_alloy(self): - """ - To understand if kind is an alloy. + """Return whether the Kind is an alloy, i.e. contains more than one element - :return: True if the kind has more than one element (i.e., - len(self.symbols) != 1), False otherwise. + :return: boolean, True if the kind has more than one element, False otherwise. """ return len(self._symbols) != 1 + @property def has_vacancies(self): - """ - Returns True if the sum of the weights is less than one. - It uses the internal variable _sum_threshold as a threshold. + """Return whether the Kind contains vacancies, i.e. when the sum of the weights is less than one. + + .. note:: the property uses the internal variable `_sum_threshold` as a threshold. - :return: a boolean + :return: boolean, True if the sum of the weights is less than one, False otherwise """ return has_vacancies(self._weights) @@ -2530,7 +2513,7 @@ def get_ase(self, kinds): used_tags = defaultdict(list) for k in kinds: # Skip alloys and vacancies - if k.is_alloy() or k.has_vacancies(): + if k.is_alloy or k.has_vacancies: tag_list.append(None) # If the kind name is equal to the specie name, # then no tag should be set @@ -2575,7 +2558,7 @@ def get_ase(self, kinds): raise ValueError("No kind '{}' has been found in the list of kinds" "".format(self.kind_name)) - if kind.is_alloy() or kind.has_vacancies(): + if kind.is_alloy or kind.has_vacancies: raise ValueError("Cannot convert to ASE if the kind represents " "an alloy or it has vacancies.") aseatom = ase.Atom(position=self.position, diff --git a/docs/source/examples/structure_tutorial.rst b/docs/source/examples/structure_tutorial.rst index b81b81fb9e..a20010759b 100644 --- a/docs/source/examples/structure_tutorial.rst +++ b/docs/source/examples/structure_tutorial.rst @@ -80,11 +80,11 @@ The following line instead:: would create a site with 90% probability of being occupied by Calcium, and 10% of being a vacancy. -Utility methods ``s.is_alloy()`` and ``s.has_vacancies()`` can be used to +Utility methods ``s.is_alloy`` and ``s.has_vacancies`` can be used to verify, respectively, if more than one element if given in the symbols list, and if the sum of all weights is smaller than one. -.. note:: if you pass more than one symbol, the method ``s.is_alloy()`` will +.. note:: if you pass more than one symbol, the method ``s.is_alloy`` will always return ``True``, even if only one symbol has occupancy 1. and all others have occupancy zero::