From 1578227ae4c84b33ef5ba79594d10644368f11d9 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Sun, 30 Jun 2019 22:07:07 +0200 Subject: [PATCH] Fix potential inefficiency in `aiida.tools.data.cif` converters (#3098) Recently, the `BackendNode` interface and implementation was changed significantly with respect to attributes and extras. As a result, the values on unstored nodes are not cleaned *until* the node is stored. That means that unstored nodes can contain attributes and extras with uncleaned values. For example an unstored `Dict` node can contained stored `Float` nodes. This change becomes problematic in the implementation of `_get_aiida_structure_pymatgen_inline` and `_get_aiida_structure_ase_inline` of `aiida.tools.data.cif`. The `parameters` keyword argument can be an unstored `Dict` node, if the functions are run with `store_provenance=False`. Since it is unstored, it can contain *stored* nodes for the values within them. For example, the `site_tolerance` can be a stored `Float`. Since they are overloaded native float objects, they can be passed to pymatgens `CifParser` without it complaining. However, now whenever that class references the value, since it is a stored node, the `ModelWrapper` will cause the model instance to be refreshed from the database, which becomes prohibitively expensive. Simply calling `clean_value` on the `parameters` beforehand, will cause these nodes to be dereferenced into their base values, which solves the issue. --- aiida/tools/data/cif.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/aiida/tools/data/cif.py b/aiida/tools/data/cif.py index 8df3c60547..a3b2e2bc78 100644 --- a/aiida/tools/data/cif.py +++ b/aiida/tools/data/cif.py @@ -7,15 +7,17 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### -"""Tools to operate on `CifData` nodes.""" # pylint: disable=invalid-name +"""Tools to operate on `CifData` nodes.""" from __future__ import division from __future__ import print_function from __future__ import absolute_import + from six.moves import range -from aiida.orm import CifData from aiida.engine import calcfunction +from aiida.orm import CifData +from aiida.orm.utils.node import clean_value class InvalidOccupationsError(Exception): @@ -89,7 +91,9 @@ def _get_aiida_structure_ase_inline(cif, **kwargs): parameters = kwargs.get('parameters', {}) if isinstance(parameters, Dict): - parameters = parameters.get_dict() + # Note, if `parameters` is unstored, it might contain stored `Node` instances which might slow down the parsing + # enormously, because each time their value is used, a database call is made to refresh the value + parameters = clean_value(parameters.get_dict()) parameters.pop('occupancy_tolerance', None) parameters.pop('site_tolerance', None) @@ -115,7 +119,9 @@ def _get_aiida_structure_pymatgen_inline(cif, **kwargs): parameters = kwargs.get('parameters', {}) if isinstance(parameters, Dict): - parameters = parameters.get_dict() + # Note, if `parameters` is unstored, it might contain stored `Node` instances which might slow down the parsing + # enormously, because each time their value is used, a database call is made to refresh the value + parameters = clean_value(parameters.get_dict()) constructor_kwargs = {}