aiidateam · sphuber · Jan 9, 2019 · Jan 8, 2019 · Jan 8, 2019 · Jan 8, 2019
diff --git a/aiida/backends/tests/dataclasses.py b/aiida/backends/tests/dataclasses.py
@@ -776,13 +776,85 @@ def test_set_file(self):
             # but it does not have a file
             with self.assertRaises(AttributeError):
                 a.filename
-            #now it has
+            # now it has
             a.set_file(tmpf.name)
             a.parse()
             a.filename
 
         self.assertNotEquals(f1, f2)
 
+    @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
+    def test_has_partial_occupancies(self):
+        import tempfile
+        from aiida.orm.data.cif import CifData
+
+        tests = [
+            # Unreadable occupations should not count as a partial occupancy
+            ('O 0.5 0.5(1) 0.5 ?', False),
+            # The default epsilon for deviation of unity for an occupation to be considered partial is 1E-6
+            ('O 0.5 0.5(1) 0.5 1.0(000000001)', False),
+            # Partial occupancies should be able to deal with parentheses in the value
+            ('O 0.5 0.5(1) 0.5 1.0(000132)', True),
+            # Partial occupancies should be able to deal with parentheses in the value
+            ('O 0.5 0.5(1) 0.5 0.9(0000132)', True),
+        ]
+
+        for test_string, result in tests:
+            with tempfile.NamedTemporaryFile(mode='w+') as handle:
+                handle.write("""
+                    data_test
+                    loop_
+                    _atom_site_label
+                    _atom_site_fract_x
+                    _atom_site_fract_y
+                    _atom_site_fract_z
+                    _atom_site_occupancy
+                    {}
+                """.format(test_string))
+                handle.flush()
+                cif = CifData(file=handle.name)
+                self.assertEqual(cif.has_partial_occupancies, result)
+
+    @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
+    def test_has_unknown_species(self):
+        import tempfile
+        from aiida.orm.data.cif import CifData
+
+        tests = [
+            ('H2 O', False),  # No unknown species
+            ('OsAx', True),   # Ax is an unknown specie
+            ('UX', True),     # X counts as unknown specie despite being defined in aiida.common.constants.elements
+            ('', None),       # If no chemical formula is defined, None should be returned
+        ]
+
+        for formula, result in tests:
+            with tempfile.NamedTemporaryFile(mode='w+') as handle:
+                formula_string = "_chemical_formula_sum '{}'".format(formula) if formula else '\n'
+                handle.write("""data_test\n{}\n""".format(formula_string))
+                handle.flush()
+                cif = CifData(file=handle.name)
+                self.assertEqual(cif.has_unknown_species, result, formula_string)
+
+    @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW")
+    def test_has_undefined_atomic_sites(self):
+        import tempfile
+        from aiida.orm.data.cif import CifData
+
+        tests = [
+            ('C 0.0 0.0 0.0', False),  # Should return False because all sites have valid coordinates
+            ('C 0.0 0.0 ?', True),     # Should return True because one site has an undefined coordinate
+            ('', True),                # Should return True if no sites defined at all
+        ]
+
+        for test_string, result in tests:
+            with tempfile.NamedTemporaryFile(mode='w+') as handle:
+                base = 'loop_\n_atom_site_label\n_atom_site_fract_x\n_atom_site_fract_y\n_atom_site_fract_z'
+                atomic_site_string = '{}\n{}'.format(base, test_string) if test_string else ''
+                handle.write("""data_test\n{}\n""".format(atomic_site_string))
+                handle.flush()
+                cif = CifData(file=handle.name)
+                self.assertEqual(cif.has_undefined_atomic_sites, result)
+
 
 class TestKindValidSymbols(AiidaTestCase):
     """
@@ -917,8 +989,8 @@ def test_sum_one_general(self):
         from aiida.orm.data.structure import Kind
 
         a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 2. / 3.])
-        self.assertTrue(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
 
     def test_sum_less_one_general(self):
         """
@@ -927,8 +999,8 @@ def test_sum_less_one_general(self):
         from aiida.orm.data.structure import Kind
 
         a = Kind(symbols=['Ba', 'C'], weights=[1. / 3., 1. / 3.])
-        self.assertTrue(a.is_alloy())
-        self.assertTrue(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertTrue(a.has_vacancies)
 
     def test_no_position(self):
         """
@@ -946,16 +1018,16 @@ def test_simple(self):
         from aiida.orm.data.structure import Kind
 
         a = Kind(symbols='Ba')
-        self.assertFalse(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertFalse(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
 
         b = Kind(symbols='Ba', weights=1.)
-        self.assertFalse(b.is_alloy())
-        self.assertFalse(b.has_vacancies())
+        self.assertFalse(b.is_alloy)
+        self.assertFalse(b.has_vacancies)
 
         c = Kind(symbols='Ba', weights=None)
-        self.assertFalse(c.is_alloy())
-        self.assertFalse(c.has_vacancies())
+        self.assertFalse(c.is_alloy)
+        self.assertFalse(c.has_vacancies)
 
     def test_automatic_name(self):
         """
@@ -1181,24 +1253,24 @@ def test_cell_ok_and_atoms(self):
         a.append_atom(position=(0., 0., 0.), symbols=['Ba'])
         a.append_atom(position=(1., 1., 1.), symbols=['Ti'])
         a.append_atom(position=(1.2, 1.4, 1.6), symbols=['Ti'])
-        self.assertFalse(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertFalse(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
         # There should be only two kinds! (two atoms of kind Ti should
         # belong to the same kind)
         self.assertEquals(len(a.kinds), 2)
 
         a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5])
-        self.assertTrue(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
 
         a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
-        self.assertTrue(a.is_alloy())
-        self.assertTrue(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertTrue(a.has_vacancies)
 
         a.clear_kinds()
         a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
-        self.assertFalse(a.is_alloy())
-        self.assertTrue(a.has_vacancies())
+        self.assertFalse(a.is_alloy)
+        self.assertTrue(a.has_vacancies)
 
     def test_cell_ok_and_unknown_atoms(self):
         """
@@ -1216,24 +1288,24 @@ def test_cell_ok_and_unknown_atoms(self):
         a.append_atom(position=(0., 0., 0.), symbols=['Ba'])
         a.append_atom(position=(1., 1., 1.), symbols=['X'])
         a.append_atom(position=(1.2, 1.4, 1.6), symbols=['X'])
-        self.assertFalse(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertFalse(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
         # There should be only two kinds! (two atoms of kind X should
         # belong to the same kind)
         self.assertEquals(len(a.kinds), 2)
 
         a.append_atom(position=(0.5, 1., 1.5), symbols=['O', 'C'], weights=[0.5, 0.5])
-        self.assertTrue(a.is_alloy())
-        self.assertFalse(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertFalse(a.has_vacancies)
 
         a.append_atom(position=(0.5, 1., 1.5), symbols=['O'], weights=[0.5])
-        self.assertTrue(a.is_alloy())
-        self.assertTrue(a.has_vacancies())
+        self.assertTrue(a.is_alloy)
+        self.assertTrue(a.has_vacancies)
 
         a.clear_kinds()
         a.append_atom(position=(0.5, 1., 1.5), symbols=['X'], weights=[0.5])
-        self.assertFalse(a.is_alloy())
-        self.assertTrue(a.has_vacancies())
+        self.assertFalse(a.is_alloy)
+        self.assertTrue(a.has_vacancies)
 
     def test_kind_1(self):
         """
@@ -1971,8 +2043,8 @@ def test_lock(self):
             a.pbc = [True, True, True]
 
         _ = a.get_cell_volume()
-        _ = a.is_alloy()
-        _ = a.has_vacancies()
+        _ = a.is_alloy
+        _ = a.has_vacancies
 
         b = a.clone()
         # I check that clone returned an unstored copy and so can be altered

diff --git a/aiida/orm/data/array/trajectory.py b/aiida/orm/data/array/trajectory.py
@@ -403,7 +403,7 @@ def _prepare_xsf(self, index=None, main_file_name=""):  # pylint: disable=unused
         return_string = "ANIMSTEPS {}\nCRYSTAL\n".format(len(indices))
         # Do the checks once and for all here:
         structure = self.get_step_structure(index=0)
-        if structure.is_alloy() or structure.has_vacancies():
+        if structure.is_alloy or structure.has_vacancies:
             raise NotImplementedError("XSF for alloys or systems with vacancies not implemented.")
         cells = self.get_cells()
         positions = self.get_positions()
@@ -413,11 +413,6 @@ def _prepare_xsf(self, index=None, main_file_name=""):  # pylint: disable=unused
 
         for idx in indices:
             return_string += "PRIMVEC {}\n".format(idx + 1)
-            #~ structure = self.get_step_structure(index=idx)
-            #~ sites = structure.sites
-            #~ if structure.is_alloy() or structure.has_vacancies():
-            #~ raise NotImplementedError("XSF for alloys or systems with "
-            #~ "vacancies not implemented.")
             for cell_vector in cells[idx]:
                 return_string += ' '.join(["{:18.5f}".format(i) for i in cell_vector])
                 return_string += "\n"

diff --git a/aiida/orm/data/cif.py b/aiida/orm/data/cif.py
@@ -414,7 +414,7 @@ def parse_formula(formula):
     return contents
 
 
-# pylint: disable=abstract-method
+# pylint: disable=abstract-method,too-many-public-methods
 # Note:  Method 'query' is abstract in class 'Node' but is not overridden
 class CifData(SinglefileData):
     """
@@ -725,26 +725,32 @@ def get_spacegroup_numbers(self):
     @property
     def has_partial_occupancies(self):
         """
-        Check if there are float values in the atomic occupancies
+        Return if the cif data contains partial occupancies
 
-        :returns: True if there are partial occupancies, False otherwise
+        A partial occupancy is defined as site with an occupancy that differs from unity, within a precision of 1E-6
+
+        .. note: occupancies that cannot be parsed into a float are ignored
+
+        :return: True if there are partial occupancies, False otherwise
         """
-        epsilon = 1e-6
+        import re
+
         tag = '_atom_site_occupancy'
+
+        epsilon = 1e-6
         partial_occupancies = False
+
         for datablock in self.values.keys():
             if tag in self.values[datablock].keys():
-                for site in self.values[datablock][tag]:
-                    # find the float number in the string
-                    bracket = site.find('(')
-                    if bracket == -1:
-                        # no bracket found
-                        if abs(float(site) - 1) > epsilon:
-                            partial_occupancies = True
+                for position in self.values[datablock][tag]:
+                    try:
+                        # First remove any parentheses to support value like 1.134(56) and then cast to float
+                        occupancy = float(re.sub(r'[\(\)]', '', position))
+                    except ValueError:
+                        pass
                     else:
-                        # bracket, cut string
-                        if abs(float(site[0:bracket]) - 1) > epsilon:
-                            partial_occupancies = True
+                        if abs(occupancy - 1) > epsilon:
+                            return True
 
         return partial_occupancies
 
@@ -765,6 +771,46 @@ def has_attached_hydrogens(self):
 
         return False
 
+    @property
+    def has_undefined_atomic_sites(self):
+        """
+        Return whether the cif data contains any undefined atomic sites.
+
+        An undefined atomic site is defined as a site where at least one of the fractional coordinates specified in the
+        `_atom_site_fract_*` tags, cannot be successfully interpreted as a float. If the cif data contains any site that
+        matches this description, or it does not contain any atomic site tags at all, the cif data is said to have
+        undefined atomic sites.
+
+        :return: boolean, True if no atomic sites are defined or if any of the defined sites contain undefined positions
+            and False otherwise
+        """
+        import re
+
+        tag_x = '_atom_site_fract_x'
+        tag_y = '_atom_site_fract_y'
+        tag_z = '_atom_site_fract_z'
+
+        # Some CifData files do not even contain a single `_atom_site_fract_*` tag
+        has_tags = False
+
+        for datablock in self.values.keys():
+            for tag in [tag_x, tag_y, tag_z]:
+                if tag in self.values[datablock].keys():
+                    for position in self.values[datablock][tag]:
+
+                        # The CifData contains at least one `_atom_site_fract_*` tag
+                        has_tags = True
+
+                        try:
+                            # First remove any parentheses to support value like 1.134(56) and then cast to float
+                            float(re.sub(r'[\(\)]', '', position))
+                        except ValueError:
+                            # Position cannot be converted to a float value, so we have undefined atomic sites
+                            return True
+
+        # At this point the file either has no tags at all, or it does and all coordinates were valid floats
+        return not has_tags
+
     @property
     def has_atomic_sites(self):
         """
@@ -792,16 +838,18 @@ def has_atomic_sites(self):
     def has_unknown_species(self):
         """
         Returns whether the cif contains atomic species that are not recognized by AiiDA.
-        The known species are taken from the elements dictionary in aiida.common.constants.
-        If any of the formula of the cif data contain species that are not in that elements
-        dictionary, the function will return True and False in all other cases. If there is
-        no formulae to be found, it will return None
+
+        The known species are taken from the elements dictionary in `aiida.common.constants`, with the exception of
+        the "unknown" placeholder element with symbol 'X', as this could not be used to construct a real structure.
+        If any of the formula of the cif data contain species that are not in that elements dictionary, the function
+        will return True and False in all other cases. If there is no formulae to be found, it will return None
 
         :returns: True when there are unknown species in any of the formulae, False if not, None if no formula found
         """
         from aiida.common.constants import elements
 
-        known_species = [element['symbol'] for element in elements.values()]
+        # Get all the elements known by AiiDA, excluding the "unknown" element with symbol 'X'
+        known_species = [element['symbol'] for element in elements.values() if element['symbol'] != 'X']
 
         for formula in self.get_formulae():