Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow StructureData without specified cell #5341

Merged
merged 18 commits into from
Apr 11, 2022
Merged
4 changes: 3 additions & 1 deletion aiida/cmdline/commands/cmd_data/cmd_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def import_aiida_xyz(filename, vacuum_factor, vacuum_addition, pbc, label, group
Import structure in XYZ format using AiiDA's internal importer
"""
from aiida.orm import StructureData
from aiida.orm.nodes.data.structure import _adjust_default_cell

with open(filename, encoding='utf8') as fobj:
xyz_txt = fobj.read()
Expand All @@ -206,7 +207,8 @@ def import_aiida_xyz(filename, vacuum_factor, vacuum_addition, pbc, label, group

try:
new_structure._parse_xyz(xyz_txt) # pylint: disable=protected-access
new_structure._adjust_default_cell( # pylint: disable=protected-access
new_structure = _adjust_default_cell( # pylint: disable=protected-access
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
new_structure,
vacuum_addition=vacuum_addition,
vacuum_factor=vacuum_factor,
pbc=pbc_bools)
Expand Down
131 changes: 79 additions & 52 deletions aiida/orm/nodes/data/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
_SUM_THRESHOLD = 1.e-6
# Threshold used to check if the cell volume is not zero.
_VOLUME_THRESHOLD = 1.e-6
# Default cell
_DEFAULT_CELL = None

_valid_symbols = tuple(i['symbol'] for i in elements.values())
_atomic_masses = {el['symbol']: el['mass'] for el in elements.values()}
Expand All @@ -42,6 +44,8 @@ def _get_valid_cell(inputcell):

:raise ValueError: whenever the format is not valid.
"""
if inputcell is _DEFAULT_CELL:
return _DEFAULT_CELL
try:
the_cell = list(list(float(c) for c in i) for i in inputcell)
if len(the_cell) != 3:
Expand Down Expand Up @@ -721,7 +725,7 @@ class StructureData(Data):

def __init__(
self,
cell=None,
cell=_DEFAULT_CELL,
pbc=None,
ase=None,
pymatgen=None,
Expand Down Expand Up @@ -759,44 +763,53 @@ def __init__(
self.set_pymatgen_molecule(pymatgen_molecule)

else:
if cell is None:
cell = [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]

if pbc is None:
# If a cell is provided by the user, we default to pbc
# If not
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
pbc = [True, True, True]

self.set_cell(cell)
self.set_pbc(pbc)

def get_dimensionality(self):
"""
This function checks the dimensionality of the structure and
calculates its length/surface/volume
:return: returns the dimensionality and length/surface/volume
calculates its length/surface/volume.

If no cell is specified, the length/surface/volume is 0.

:return: returns a dictionary with keys "dim" (dimensionality integer), "label" (dimensionality label)
and "value" (numerical length/surface/volume).
"""

import numpy as np

retdict = {}

cell = np.array(self.cell)
pbc = np.array(self.pbc)
dim = len(pbc[pbc])

retdict['dim'] = dim
retdict['label'] = self._dimensionality_label[dim]

if dim not in (0, 1, 2, 3):
raise ValueError(f'Dimensionality {dim} must be one of 0, 1, 2, 3')

if self.cell == _DEFAULT_CELL:
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
# If no cell was specified, no volume can be computed
retdict['value'] = 0
return retdict

cell = np.array(self.cell)
if dim == 0:
pass
# We have no concept of 0d volume. Let's return a value of 0 for a consistent output dictionary
retdict['value'] = 0
elif dim == 1:
retdict['value'] = np.linalg.norm(cell[pbc])
elif dim == 2:
vectors = cell[pbc]
retdict['value'] = np.linalg.norm(np.cross(vectors[0], vectors[1]))
elif dim == 3:
retdict['value'] = np.dot(cell[0], np.cross(cell[1], cell[2]))
else:
raise ValueError(f'Dimensionality {dim} must be <= 3')

return retdict

Expand All @@ -806,7 +819,10 @@ def set_ase(self, aseatoms):
"""
if is_ase_atoms(aseatoms):
# Read the ase structure
self.cell = aseatoms.cell
if calc_cell_volume(aseatoms.cell) == 0.0: # ASE defaults to cell [0.,0.,0.]
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
self.cell = _DEFAULT_CELL
else:
self.cell = aseatoms.cell
self.pbc = aseatoms.pbc
self.clear_kinds() # This also calls clear_sites
for atom in aseatoms:
Expand Down Expand Up @@ -1123,46 +1139,6 @@ def _parse_xyz(self, inputstring):
for sym, position in atoms:
self.append_atom(symbols=sym, position=position)

def _adjust_default_cell(self, vacuum_factor=1.0, vacuum_addition=10.0, pbc=(False, False, False)):
"""
If the structure was imported from an xyz file, it lacks a defined cell,
and the default cell is taken ([[1,0,0], [0,1,0], [0,0,1]]),
leading to an unphysical definition of the structure.
This method will adjust the cell
"""
# pylint: disable=invalid-name
import numpy as np

def get_extremas_from_positions(positions):
"""
returns the minimum and maximum value for each dimension in the positions given
"""
return list(zip(*[(min(values), max(values)) for values in zip(*positions)]))

# First, set PBC
# All the checks are done in get_valid_pbc called by set_pbc, no need to check anything here
self.set_pbc(pbc)

# Calculating the minimal cell:
positions = np.array([site.position for site in self.sites])
position_min, _ = get_extremas_from_positions(positions)

# Translate the structure to the origin, such that the minimal values in each dimension
# amount to (0,0,0)
positions -= position_min
for index, site in enumerate(self.get_attribute('sites')):
site['position'] = list(positions[index])

# The orthorhombic cell that (just) accomodates the whole structure is now given by the
# extremas of position in each dimension:
minimal_orthorhombic_cell_dimensions = np.array(get_extremas_from_positions(positions)[1])
minimal_orthorhombic_cell_dimensions = np.dot(vacuum_factor, minimal_orthorhombic_cell_dimensions)
minimal_orthorhombic_cell_dimensions += vacuum_addition

# Transform the vector (a, b, c ) to [[a,0,0], [0,b,0], [0,0,c]]
newcell = np.diag(minimal_orthorhombic_cell_dimensions)
self.set_cell(newcell.tolist())

def get_description(self):
"""
Returns a string with infos retrieved from StructureData node's properties
Expand Down Expand Up @@ -1686,6 +1662,11 @@ def set_pbc(self, value):
raise ModificationNotAllowed('The StructureData object cannot be modified, it has already been stored')
the_pbc = get_valid_pbc(value)

# In order to introduce this consistency check, we would need to change the default value of `pbc`
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
# (at least for cases where the user does not provide a cell).
# if any(the_pbc):
# assert self.cell is not None, 'Must provide cell when specifying periodic boundary conditions.'

# self._pbc = the_pbc
self.set_attribute('pbc1', the_pbc[0])
self.set_attribute('pbc2', the_pbc[1])
Expand Down Expand Up @@ -1758,6 +1739,8 @@ def get_cell_volume(self):

:return: a float.
"""
if self.cell is _DEFAULT_CELL:
return 0
return calc_cell_volume(self.cell)

def get_cif(self, converter='ase', store=False, **kwargs):
Expand Down Expand Up @@ -1861,6 +1844,8 @@ def _get_object_pymatgen_structure(self, **kwargs):

if self.pbc != (True, True, True):
raise ValueError('Periodic boundary conditions must apply in all three dimensions of real space')
if self.cell == _DEFAULT_CELL:
ltalirz marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError('Must specify cell in order to create pymatgen structure')
ltalirz marked this conversation as resolved.
Show resolved Hide resolved

species = []
additional_kwargs = {}
Expand Down Expand Up @@ -2493,3 +2478,45 @@ def __str__(self):

# get_structuredata_from_qeinput has been moved to:
# aiida.tools.codespecific.quantumespresso.qeinputparser


def _adjust_default_cell(structure, vacuum_factor=1.0, vacuum_addition=10.0, pbc=(False, False, False)):
"""
If the structure was imported from an xyz file, it lacks a cell.
This method will adjust the cell

Note: This helper method is not used by the StructureData class directly.
"""
# pylint: disable=invalid-name
import numpy as np

def get_extremas_from_positions(positions):
"""
returns the minimum and maximum value for each dimension in the positions given
"""
return list(zip(*[(min(values), max(values)) for values in zip(*positions)]))

# Calculating the minimal cell:
positions = np.array([site.position for site in structure.sites])
position_min, _ = get_extremas_from_positions(positions)

# Translate the structure to the origin, such that the minimal values in each dimension
# amount to (0,0,0)
positions -= position_min
for index, site in enumerate(structure.get_attribute('sites')):
site['position'] = list(positions[index])

# The orthorhombic cell that (just) accomodates the whole structure is now given by the
# extremas of position in each dimension:
minimal_orthorhombic_cell_dimensions = np.array(get_extremas_from_positions(positions)[1])
minimal_orthorhombic_cell_dimensions = np.dot(vacuum_factor, minimal_orthorhombic_cell_dimensions)
minimal_orthorhombic_cell_dimensions += vacuum_addition

# Transform the vector (a, b, c ) to [[a,0,0], [0,b,0], [0,0,c]]
newcell = np.diag(minimal_orthorhombic_cell_dimensions)
structure.set_cell(newcell.tolist())

# Now set PBC (checks are done in set_pbc, no need to check anything here)
structure.set_pbc(pbc)

return structure
31 changes: 27 additions & 4 deletions tests/test_dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1711,7 +1711,6 @@ def test_get_cif(self):

def test_xyz_parser(self):
"""Test XYZ parser."""
import numpy as np
xyz_string1 = """
3

Expand Down Expand Up @@ -1743,9 +1742,7 @@ def test_xyz_parser(self):
# Making sure that the structure has sites, kinds and a cell
self.assertTrue(s.sites)
self.assertTrue(s.kinds)
self.assertTrue(s.cell)
# The default cell is given in these cases:
self.assertEqual(s.cell, np.diag([1, 1, 1]).tolist())
self.assertIsNone(s.cell)

# Testing a case where 1
xyz_string4 = """
Expand Down Expand Up @@ -1949,6 +1946,28 @@ def test_ase(self):

self.assertAlmostEqual(c[1].mass, 110.2)

@unittest.skipIf(not has_ase(), 'Unable to import ase')
def test_ase_molecule(self): # pylint: disable=no-self-use
"""Tests that importing a molecule from ASE works."""
from ase.build import molecule
s = StructureData(ase=molecule('H2O'))

assert s.cell is None
assert s.pbc == (False, False, False)
retdict = s.get_dimensionality()
assert retdict['value'] == 0
assert retdict['dim'] == 0

# Enabling this consistency check would require us to change the default value
# of pbc to [False, False, False]
# with self.assertRaises(AssertionError):
# Enabling pbc on a structure with no cell should raise
# s.set_pbc(True)

# after setting a cell, we should be able to enable pbc
s.set_cell([[5, 0, 0], [0, 5, 0], [0, 0, 5]])
s.set_pbc(True)

@unittest.skipIf(not has_ase(), 'Unable to import ase')
def test_conversion_of_types_1(self):
"""
Expand Down Expand Up @@ -2307,7 +2326,11 @@ class TestPymatgenFromStructureData(AiidaTestCase):
def test_1(self):
"""Tests the check of periodic boundary conditions."""
struct = StructureData()
with self.assertRaises(ValueError):
# cell needed for pymatgen structure
struct.get_pymatgen_structure()

struct.set_cell([[1, 0, 0], [0, 1, 2], [3, 4, 5]])
struct.pbc = [True, True, True]
struct.get_pymatgen_structure()

Expand Down