From ce539d8b97a5059e2a9306b9cfc99737c9f29da4 Mon Sep 17 00:00:00 2001 From: Leonid Kahle Date: Thu, 1 Jun 2017 18:00:20 +0200 Subject: [PATCH 001/101] #119 added first small test to check for node hashing --- aiida/backends/tests/nodes.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 7840e8fa5f..651f756ef6 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -20,6 +20,35 @@ from aiida.orm.utils import load_node +class TestNodeHashing(AiidaTestCase): + """ + Tests the functionality of hashing a node + """ + def test_hashing_of_node_1(self): + + n1 = Node() + n1._set_attr('a',1.0) + n1._set_attr('b',1.1) + n1._set_attr('c',1.2) + n1.store() + + + n2 = Node() + n2._set_attr('a',1.0) + n2._set_attr('b',1.1) + n2._set_attr('c',1.2) + n2.store(find_same=True) + + self.assertEqual(n1.uuid, n2.uuid) + self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) + + + n3 = Node() + n3._set_attr('a',2.0) + n3._set_attr('b',1.1) + n3._set_attr('c',1.2) + n3.store(find_same=True) + self.assertNotEquals(n2.uuid, n3.uuid) class TestDataNode(AiidaTestCase): """ From 7589afdbb535a814e2a10b3c8bb916ae2d85e091 Mon Sep 17 00:00:00 2001 From: Leonid Kahle Date: Thu, 1 Jun 2017 18:01:11 +0200 Subject: [PATCH 002/101] #119 Added small functions get_hash and get_same_node to Node --- aiida/orm/implementation/general/node.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index d82808fd51..90dce5c39c 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1092,6 +1092,8 @@ def iterextras(self): return yield # Needed after return to convert it to a generator for _ in self._db_iterextras(): + if _[0] == 'hash': + continue yield _ @@ -1471,6 +1473,7 @@ def store(self, with_transaction=True): # for storing data and its attributes. pass + def __del__(self): """ Called only upon real object destruction from memory @@ -1480,6 +1483,26 @@ def __del__(self): if getattr(self, '_temp_folder', None) is not None: self._temp_folder.erase() + + def get_hash(self): + """ + Making a hash based on my attributes + """ + from aiida.common.hashing import make_hash + return make_hash(self.get_attrs()) + + + def get_same_node(self): + from aiida.orm.querybuilder import QueryBuilder + + qb = QueryBuilder() + qb.append(self.__class__, filters={'extras.hash':self.get_hash()}, project='*', subclassing=False) + same_node = qb.first() + if same_node: + return same_node[0] + else: + return None + @property def out(self): """ From 13caff274d9965613dd0b374c87bf3a6463a125e Mon Sep 17 00:00:00 2001 From: Leonid Kahle Date: Thu, 1 Jun 2017 18:02:33 +0200 Subject: [PATCH 003/101] #119 Added functionality to store method (only for Django backend) that replaces the _dbnode member if a similar node already exists --- aiida/orm/implementation/django/node.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 0f7563b9d5..cd64c62b96 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -557,7 +557,7 @@ def _store_cached_input_links(self, with_transaction=True): # would have been raised, and the following lines are not executed) self._inputlinks_cache.clear() - def store(self, with_transaction=True): + def store(self, with_transaction=True, find_same=False): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -573,6 +573,8 @@ def store(self, with_transaction=True): :parameter with_transaction: if False, no transaction is used. This is meant to be used ONLY if the outer calling function has already a transaction open! + + :param bool find_same: Whether I attempt to find an equal node in the DB. """ # TODO: This needs to be generalized, allowing for flexible methods # for storing data and its attributes. @@ -587,6 +589,7 @@ def store(self, with_transaction=True): else: context_man = EmptyContextManager() + if self._to_be_stored: # As a first thing, I check if the data is valid @@ -603,6 +606,18 @@ def store(self, with_transaction=True): # I assume that if a node exists in the DB, its folder is in place. # On the other hand, periodically the user might need to run some # bookkeeping utility to check for lone folders. + + + # For node hashing, if find_same is true: + if find_same: + same_node = self.get_same_node() + if same_node is not None: + #~ pass + self._dbnode = same_node.dbnode + self._to_be_stored = False + self._repo_folder = same_node._repo_folder + return self + self._repository_folder.replace_with_folder( self._get_temp_folder().abspath, move=True, overwrite=True) @@ -653,6 +668,9 @@ def store(self, with_transaction=True): # This is useful because in this way I can do # n = Node().store() + from aiida.backends.djsite.db.models import DbExtra + # I store the hash without cleaning and without incrementing the nodeversion number + DbExtra.set_value_for_node(self.dbnode, 'hash', self.get_hash()) return self @property From ca5f0477021a27fe98e7ec503fff05fd9abec222 Mon Sep 17 00:00:00 2001 From: Leonid Kahle Date: Fri, 2 Jun 2017 06:40:18 +0200 Subject: [PATCH 004/101] #119 Try-except clause if hashing fails, and also check for None for hash, which obviously should not be checked in the DB --- aiida/orm/implementation/general/node.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 90dce5c39c..70ecc7d364 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1489,15 +1489,23 @@ def get_hash(self): Making a hash based on my attributes """ from aiida.common.hashing import make_hash - return make_hash(self.get_attrs()) + try: + return make_hash(self.get_attrs()) + except: + return None def get_same_node(self): from aiida.orm.querybuilder import QueryBuilder - qb = QueryBuilder() - qb.append(self.__class__, filters={'extras.hash':self.get_hash()}, project='*', subclassing=False) - same_node = qb.first() + hash_ = self.get_hash() + if hash_: + qb = QueryBuilder() + qb.append(self.__class__, filters={'extras.hash':hash_}, project='*', subclassing=False) + same_node = qb.first() + else: + same_node = None + if same_node: return same_node[0] else: From 87e546fc8fbd3856464db3f02deb5c8453b3f1f3 Mon Sep 17 00:00:00 2001 From: Leonid Kahle Date: Fri, 2 Jun 2017 08:22:54 +0200 Subject: [PATCH 005/101] #119 Fix in test that was failing because there is now an additional extra (hash) --- aiida/backends/djsite/db/subtests/generic.py | 12 ++++++------ aiida/orm/implementation/general/node.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/aiida/backends/djsite/db/subtests/generic.py b/aiida/backends/djsite/db/subtests/generic.py index e9cab03d09..18fcebe550 100644 --- a/aiida/backends/djsite/db/subtests/generic.py +++ b/aiida/backends/djsite/db/subtests/generic.py @@ -133,18 +133,18 @@ def test_replacement_1(self): DbExtra.set_value_for_node(n1.dbnode, "pippobis", [5, 6, 'c']) DbExtra.set_value_for_node(n2.dbnode, "pippo2", [3, 4, 'b']) - self.assertEquals(n1.dbnode.extras, {'pippo': [1, 2, 'a'], + self.assertEquals(n1.get_extras(), {'pippo': [1, 2, 'a'], 'pippobis': [5, 6, 'c']}) - self.assertEquals(n2.dbnode.extras, {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}]} DbExtra.reset_values_for_node(n1.dbnode, attributes=new_attrs) - self.assertEquals(n1.dbnode.extras, new_attrs) - self.assertEquals(n2.dbnode.extras, {'pippo2': [3, 4, 'b']}) + self.assertEquals(n1.get_extras(), new_attrs) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) DbExtra.del_value_for_node(n1.dbnode, key='newval2') del new_attrs['newval2'] - self.assertEquals(n1.dbnode.extras, new_attrs) + self.assertEquals(n1.get_extras(), new_attrs) # Also check that other nodes were not damaged - self.assertEquals(n2.dbnode.extras, {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 70ecc7d364..dd06a34b15 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1092,6 +1092,7 @@ def iterextras(self): return yield # Needed after return to convert it to a generator for _ in self._db_iterextras(): + # Don't return if key == hash if _[0] == 'hash': continue yield _ From 39d378e3bb1cf1487237079a448a01c1540cf7b5 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 15 Jun 2017 15:37:57 +0200 Subject: [PATCH 006/101] * Failing test for two small but unequal floats * Failing test for two ArrayData with unequal content * (Accidentally) passing test for two ArrayData of different size, with same str representation For the ArrayData, we need to take the actual array into account when creating the hash, not just the shape which is return in get_attrs() --- aiida/backends/tests/nodes.py | 89 +++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 651f756ef6..e8dd55d7fe 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -12,10 +12,13 @@ """ import unittest +import numpy as np + from aiida.backends.testbase import AiidaTestCase from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType from aiida.orm.data import Data +from aiida.orm.data.array import ArrayData from aiida.orm.node import Node from aiida.orm.utils import load_node @@ -24,31 +27,55 @@ class TestNodeHashing(AiidaTestCase): """ Tests the functionality of hashing a node """ - def test_hashing_of_node_1(self): - - n1 = Node() - n1._set_attr('a',1.0) - n1._set_attr('b',1.1) - n1._set_attr('c',1.2) - n1.store() - - - n2 = Node() - n2._set_attr('a',1.0) - n2._set_attr('b',1.1) - n2._set_attr('c',1.2) - n2.store(find_same=True) - - self.assertEqual(n1.uuid, n2.uuid) - self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) - - - n3 = Node() - n3._set_attr('a',2.0) - n3._set_attr('b',1.1) - n3._set_attr('c',1.2) - n3.store(find_same=True) - self.assertNotEquals(n2.uuid, n3.uuid) + @staticmethod + def create_simple_node(a, b=0, c=0): + n = Node() + n._set_attr('a', a) + n._set_attr('b', b) + n._set_attr('c', c) + return n + + def test_simple_equal_nodes(self): + attributes = [ + (1.0, 1.1, 1.2), + ({'a': 'b', 'c': 'd'}, [1, 2, 3], {4, 1, 2}) + ] + for attr in attributes: + n1 = self.create_simple_node(*attr) + n2 = self.create_simple_node(*attr) + n1.store() + n2.store(find_same=True) + self.assertEqual(n1.uuid, n2.uuid) + self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) + + def test_simple_unequal_nodes(self): + attributes = [ + [(1.0, 1.1, 1.2), (2.0, 1.1, 1.2)], + [(1e-14,), (2e-14,)], + ] + for attr1, attr2 in attributes: + n1 = self.create_simple_node(*attr1) + n2 = self.create_simple_node(*attr2) + n1.store() + n2.store(find_same=True) + self.assertNotEquals(n1.uuid, n2.uuid) + + def test_unequal_arrays(self): + arrays = [ + (np.zeros(1001), np.zeros(1005)), + (np.array([1, 2, 3]), np.array([2, 3, 4])) + ] + def create_arraydata(arr): + a = ArrayData() + a.set_array('a', arr) + return a + + for arr1, arr2 in arrays: + a1 = create_arraydata(arr1) + a1.store() + a2 = create_arraydata(arr2) + a2.store(find_same=True) + self.assertNotEquals(a1.uuid, a2.uuid) class TestDataNode(AiidaTestCase): """ @@ -796,7 +823,7 @@ def test_get_extras_with_default(self): a = Node() a.store() a.set_extra('a', 'b') - + self.assertEquals(a.get_extra('a'), 'b') with self.assertRaises(AttributeError): a.get_extra('c') @@ -1025,7 +1052,7 @@ def test_basetype_as_attr(self): self.assertIsInstance(p.get_attr('b'),basestring) self.assertEqual(p.get_attr('c'), ['b', [1,2]]) self.assertIsInstance(p.get_attr('c'), (list, tuple)) - + # Check also before storing n = Node() n._set_attr('a', Str("sometext2")) @@ -1034,15 +1061,15 @@ def test_basetype_as_attr(self): self.assertIsInstance(n.get_attr('a'),basestring) self.assertEqual(n.get_attr('b'), ['f', True, {'gg': None}]) self.assertIsInstance(n.get_attr('b'), (list, tuple)) - + # Check also deep in a dictionary/list n = Node() n._set_attr('a', {'b': [Str("sometext3")]}) self.assertEqual(n.get_attr('a')['b'][0], "sometext3") - self.assertIsInstance(n.get_attr('a')['b'][0],basestring) + self.assertIsInstance(n.get_attr('a')['b'][0],basestring) n.store() self.assertEqual(n.get_attr('a')['b'][0], "sometext3") - self.assertIsInstance(n.get_attr('a')['b'][0],basestring) + self.assertIsInstance(n.get_attr('a')['b'][0],basestring) def test_basetype_as_extra(self): """ @@ -1077,7 +1104,7 @@ def test_basetype_as_extra(self): n.store() n.set_extra('a', {'b': [Str("sometext3")]}) self.assertEqual(n.get_extra('a')['b'][0], "sometext3") - self.assertIsInstance(n.get_extra('a')['b'][0],basestring) + self.assertIsInstance(n.get_extra('a')['b'][0],basestring) def test_versioning_lowlevel(self): """ From 219d53ba0b826de0e6438492e908e3fe2466fe93 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 11:06:05 +0200 Subject: [PATCH 007/101] Give name to '_' in loop --- aiida/orm/implementation/general/node.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index ab88d18e9e..34193e6963 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -142,11 +142,11 @@ def __new__(cls, name, bases, attrs): def get_desc(self): """ - Returns a string with infos retrieved from a node's + Returns a string with infos retrieved from a node's properties. This method is actually overwritten by the inheriting classes - - :return: a description string + + :return: a description string """ return "" @@ -1103,11 +1103,11 @@ def iterextras(self): # Return without value, meaning that this is an empty generator return yield # Needed after return to convert it to a generator - for _ in self._db_iterextras(): + for extra in self._db_iterextras(): # Don't return if key == hash - if _[0] == 'hash': + if extra[0] == 'hash': continue - yield _ + yield extra def iterattrs(self): From 76acbb6e17d8dce555cba1820502fe6f5693b648 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 12:16:53 +0200 Subject: [PATCH 008/101] Update hashing algorithm --- aiida/common/hashing.py | 168 +++++++++++++++-------- aiida/orm/implementation/general/node.py | 2 + 2 files changed, 110 insertions(+), 60 deletions(-) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index dfc0ccd489..742f609bb1 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -12,6 +12,15 @@ import hashlib import time from datetime import datetime +import numbers +try: # Python3 + from functools import singledispatch + from collections import abc +except ImportError: # Python2 + from singledispatch import singledispatch + import collections as abc + +import numpy as np """ Here we define a single password hashing instance for the full AiiDA. @@ -112,14 +121,14 @@ def make_hash_with_type(type_chr, string_to_hash): """ return hashlib.sha224("{}{}".format(type_chr, string_to_hash)).hexdigest() -def make_hash(object_to_hash, float_precision=12): +@singledispatch +def make_hash(object_to_hash): """ Makes a hash from a dictionary, list, tuple or set to any level, that contains only other hashable or nonhashable types (including lists, tuples, sets, and dictionaries). :param object_to_hash: the object to hash - :param int float_precision: the precision when converting floats to strings :returns: a unique hash @@ -181,66 +190,105 @@ def make_hash(object_to_hash, float_precision=12): the string of dictionary do not suffice if we want to check for equality of dictionaries using hashes. """ - import numpy as np + raise ValueError("Value of type {} cannot be hashed".format( + type(object_to_hash)) + ) - if isinstance(object_to_hash, (tuple, list)): - hashes = tuple([ - make_hash(_, float_precision=float_precision) - for _ - in object_to_hash - ]) - # We treat lists and tuples as if they are the same thing, - # but I think this is OK - return make_hash_with_type('L', "".join(hashes)) - - elif isinstance(object_to_hash, set): - hashes = tuple([ - make_hash(_, float_precision=float_precision) - for _ - in sorted(object_to_hash) - ]) - return make_hash_with_type('S', "".join(hashes)) +@make_hash.register(abc.Sequence) +def _(object_to_hash): + hashes = tuple([ + make_hash(x) for x in object_to_hash + ]) + return make_hash_with_type('L', ",".join(hashes)) + +@make_hash.register(abc.Set) +def _(object_to_hash): + hashes = tuple([ + make_hash(x) + for x + in sorted(object_to_hash) + ]) + return make_hash_with_type('S', ",".join(hashes)) + +@make_hash.register(abc.Mapping) +def _(object_to_hash): + hashed_dictionary = { + k: make_hash(v) + for k,v + in object_to_hash.items() + } + return make_hash_with_type( + 'D', + make_hash(sorted(hashed_dictionary.items())) + ) - elif isinstance(object_to_hash, dict): - hashed_dictionary = { - k: make_hash(v, float_precision=float_precision) - for k,v - in object_to_hash.items() - } - return make_hash_with_type( - 'D', make_hash(sorted( - hashed_dictionary.items()), float_precision=float_precision - ) - ) +@make_hash.register(numbers.Real) +def _(object_to_hash): + return make_hash_with_type( + 'f', + truncate_float64(object_to_hash).tobytes() + ) + +@make_hash.register(numbers.Complex) +def _(object_to_hash): + return make_hash_with_type( + 'c', + ','.join([ + make_hash(object_to_hash.real), + make_hash(object_to_hash.imag) + ]) + ) - elif isinstance(object_to_hash, float): +@make_hash.register(numbers.Integral) +def _(object_to_hash): + return make_hash_with_type('i', str(object_to_hash)) + +@make_hash.register(basestring) +def _(object_to_hash): + return make_hash_with_type('s', object_to_hash) + +@make_hash.register(bool) +def _(object_to_hash): + return make_hash_with_type('b', str(object_to_hash)) + +@make_hash.register(type(None)) +def _(object_to_hash): + return make_hash_with_type('n', str(object_to_hash)) + +@make_hash.register(datetime) +def _(object_to_hash): + return make_hash_with_type('d', str(object_to_hash)) + +@make_hash.register(np.ndarray) +def _(object_to_hash): + if object_to_hash.dtype == np.float64: + return make_hash_with_type( + 'af', + make_hash(truncate_array64(object_to_hash).tobytes()) + ) + elif object_to_hash.dtype == np.complex128: return make_hash_with_type( - 'f','{:.{precision}f}'.format( - object_to_hash, precision=float_precision - ) - ) - # If is numpy: - elif type(object_to_hash).__module__ == np.__name__: - return make_hash_with_type('N', str(object_to_hash)) - - elif isinstance(object_to_hash, basestring): - return make_hash_with_type('s', object_to_hash) - - elif isinstance(object_to_hash, bool): # bool must come before int - # I prefer to be sure of what I hash instead of using 'str' - return make_hash_with_type('b', "True" if object_to_hash else "False") - - elif object_to_hash is None: - return make_hash_with_type('n', "None") - - elif isinstance(object_to_hash, int): - return make_hash_with_type('i', str(object_to_hash)) - elif isinstance(object_to_hash, long): - return make_hash_with_type('l', str(object_to_hash)) - - elif isinstance(object_to_hash, datetime): - return make_hash_with_type('d', str(object_to_hash)) - # Possibly add more types here, as needed + 'ac', + make_hash([ + object_to_hash.real, + object_to_hash.imag + ]) + ) else: - raise ValueError("Value of type {} cannot be hashed".format( - type(object_to_hash))) + return make_hash_with_type( + 'ao', + make_hash(object_to_hash.tobytes()) + ) + +def truncate_float64(x, num_bits=4): + mask = ~(2**num_bits - 1) + int_repr = np.float64(x).view(np.int64) + masked_int = int_repr & mask + truncated_x = masked_int.view(np.float64) + return truncated_x + +def truncate_array64(x, num_bits=4): + mask = ~(2**num_bits - 1) + int_array = np.array(x, dtype=np.float64).view(np.int64) + masked_array = int_array & mask + return masked_array.view(np.float64) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 34193e6963..d4663c26c9 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1502,6 +1502,8 @@ def get_hash(self): Making a hash based on my attributes """ from aiida.common.hashing import make_hash + # TODO: Fix getting the hash when get_attrs doesn't + # produce the "full" content (e.g. for ArrayData) try: return make_hash(self.get_attrs()) except: From 2de9bc82e2a99c70fb9e51cd74a0959dc0a66fc2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 12:43:40 +0200 Subject: [PATCH 009/101] Fix get_hash for special case of ArrayData --- aiida/orm/implementation/general/node.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index d4663c26c9..2ce0ab00b2 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1505,6 +1505,16 @@ def get_hash(self): # TODO: Fix getting the hash when get_attrs doesn't # produce the "full" content (e.g. for ArrayData) try: + if hasattr(self, 'get_array'): + return make_hash([ + self.get_attrs(), + [ + self.get_array(name) for name in + self.get_arraynames() + ] + ]) + else: + return make_hash(self.get_attrs()) return make_hash(self.get_attrs()) except: return None From a4ae949e5fcce8de3e6a798085bfa5667ffacb52 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 14:59:22 +0200 Subject: [PATCH 010/101] Fix ArrayData issue by hashing folder content for 'pathlib.Path' --- aiida/common/hashing.py | 10 ++++++++++ aiida/orm/implementation/general/node.py | 22 ++++++++-------------- setup_requirements.py | 1 + 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index 742f609bb1..f8a0d25efd 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -14,13 +14,16 @@ from datetime import datetime import numbers try: # Python3 + import pathlib from functools import singledispatch from collections import abc except ImportError: # Python2 + import pathlib2 as pathlib from singledispatch import singledispatch import collections as abc import numpy as np +import checksumdir """ Here we define a single password hashing instance for the full AiiDA. @@ -259,6 +262,13 @@ def _(object_to_hash): def _(object_to_hash): return make_hash_with_type('d', str(object_to_hash)) +@make_hash.register(pathlib.Path) +def _(object_to_hash): + return make_hash_with_type( + 'p', + checksumdir.dirhash(str(object_to_hash)) + ) + @make_hash.register(np.ndarray) def _(object_to_hash): if object_to_hash.dtype == np.float64: diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 2ce0ab00b2..8f485dad39 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -14,6 +14,10 @@ import logging import os import types +try: + import pathlib +except ImportError: + import pathlib2 as pathlib from aiida.common.exceptions import (InternalError, ModificationNotAllowed, UniquenessError) @@ -1502,24 +1506,14 @@ def get_hash(self): Making a hash based on my attributes """ from aiida.common.hashing import make_hash - # TODO: Fix getting the hash when get_attrs doesn't - # produce the "full" content (e.g. for ArrayData) try: - if hasattr(self, 'get_array'): - return make_hash([ - self.get_attrs(), - [ - self.get_array(name) for name in - self.get_arraynames() - ] - ]) - else: - return make_hash(self.get_attrs()) - return make_hash(self.get_attrs()) + return make_hash([ + self.get_attrs(), + pathlib.Path(self.get_abs_path()) + ]) except: return None - def get_same_node(self): from aiida.orm.querybuilder import QueryBuilder diff --git a/setup_requirements.py b/setup_requirements.py index d0e9bf4436..b9adee2b49 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -22,6 +22,7 @@ 'six==1.10', 'future', 'singledispatch >= 3.4.0.0', + 'checksumdir >= 1.1.4' # We need for the time being to stay with an old version # of celery, including the versions of the AMQP libraries below, # because the support for a SQLA broker has been dropped in later From 516c44e89531d93eea2de55cd8f7f9cae5e48e58 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 15:05:27 +0200 Subject: [PATCH 011/101] Explicitly raise error when non-directory path is hashed --- aiida/common/hashing.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index f8a0d25efd..46b5c109f8 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -264,10 +264,13 @@ def _(object_to_hash): @make_hash.register(pathlib.Path) def _(object_to_hash): - return make_hash_with_type( - 'p', - checksumdir.dirhash(str(object_to_hash)) - ) + try: + return make_hash_with_type( + 'pd', + checksumdir.dirhash(str(object_to_hash)) + ) + except TypeError: + raise ValueError('Cannot hash pathlib.Path unless it is a directory.') @make_hash.register(np.ndarray) def _(object_to_hash): From dcb4a38b7c432edfba957c9598fbda6cc7547c7f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 15:26:09 +0200 Subject: [PATCH 012/101] Add pathlib2 requirement --- setup_requirements.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup_requirements.py b/setup_requirements.py index b9adee2b49..1461bda0d5 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -21,6 +21,7 @@ 'pytz==2014.10', 'six==1.10', 'future', + 'pathlib2', 'singledispatch >= 3.4.0.0', 'checksumdir >= 1.1.4' # We need for the time being to stay with an old version From bbe4c33ad582e45885679df00e3fdb83612a1257 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 16:23:25 +0200 Subject: [PATCH 013/101] Print modulename to check failing Travis test --- aiida/backends/testbase.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiida/backends/testbase.py b/aiida/backends/testbase.py index cb2afa57ac..f438468838 100644 --- a/aiida/backends/testbase.py +++ b/aiida/backends/testbase.py @@ -143,7 +143,7 @@ def run_aiida_db_tests(tests_to_run, verbose=False): actually_run_tests = [] num_tests_expected = 0 - + # To avoid adding more than once the same test # (e.g. if you type both db and db.xxx) found_modulenames = set() @@ -160,6 +160,7 @@ def run_aiida_db_tests(tests_to_run, verbose=False): for modulename in modulenames: if modulename not in found_modulenames: + print(modulename) test_suite.addTest(test_loader.loadTestsFromName(modulename)) found_modulenames.add(modulename) From fb182ccb3116173f1a7df46f179a35d003066359 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 16:32:15 +0200 Subject: [PATCH 014/101] Try moving ArrayData import inside the test --- aiida/backends/testbase.py | 1 - aiida/backends/tests/nodes.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/aiida/backends/testbase.py b/aiida/backends/testbase.py index f438468838..7fc497b2cb 100644 --- a/aiida/backends/testbase.py +++ b/aiida/backends/testbase.py @@ -160,7 +160,6 @@ def run_aiida_db_tests(tests_to_run, verbose=False): for modulename in modulenames: if modulename not in found_modulenames: - print(modulename) test_suite.addTest(test_loader.loadTestsFromName(modulename)) found_modulenames.add(modulename) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index e44aa4cc95..579cddd07e 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -18,10 +18,9 @@ from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType from aiida.orm.data import Data -from aiida.orm.data.array import ArrayData from aiida.orm.node import Node from aiida.orm.utils import load_node - +# NOTE: Some AiiDA imports cause problems when placed outside of the tests. class TestNodeHashing(AiidaTestCase): """ @@ -61,6 +60,7 @@ def test_simple_unequal_nodes(self): self.assertNotEquals(n1.uuid, n2.uuid) def test_unequal_arrays(self): + from aiida.orm.data.array import ArrayData arrays = [ (np.zeros(1001), np.zeros(1005)), (np.array([1, 2, 3]), np.array([2, 3, 4])) From dc939620a5c0bd1ec90024134fc86c818f9419ce Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 16:54:13 +0200 Subject: [PATCH 015/101] Move numpy import to test --- aiida/backends/tests/nodes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 579cddd07e..b8047b2b56 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -12,15 +12,12 @@ """ import unittest -import numpy as np - from aiida.backends.testbase import AiidaTestCase from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType from aiida.orm.data import Data from aiida.orm.node import Node from aiida.orm.utils import load_node -# NOTE: Some AiiDA imports cause problems when placed outside of the tests. class TestNodeHashing(AiidaTestCase): """ @@ -60,6 +57,7 @@ def test_simple_unequal_nodes(self): self.assertNotEquals(n1.uuid, n2.uuid) def test_unequal_arrays(self): + import numpy as np from aiida.orm.data.array import ArrayData arrays = [ (np.zeros(1001), np.zeros(1005)), From 5fef7f2fe1a26c20509ff947278398ada372073f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 17:42:57 +0200 Subject: [PATCH 016/101] Add comma after checksumdir requirement --- setup_requirements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_requirements.py b/setup_requirements.py index 1461bda0d5..4aa021ffd1 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -23,7 +23,7 @@ 'future', 'pathlib2', 'singledispatch >= 3.4.0.0', - 'checksumdir >= 1.1.4' + 'checksumdir >= 1.1.4', # We need for the time being to stay with an old version # of celery, including the versions of the AMQP libraries below, # because the support for a SQLA broker has been dropped in later From f04d3bb8c44310746ed73815879383b9addbd9d8 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 17:51:52 +0200 Subject: [PATCH 017/101] Add find_same to store_all, add to SQLAlchemy --- aiida/orm/implementation/django/node.py | 4 ++-- aiida/orm/implementation/sqlalchemy/node.py | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index c8b0121ff9..927a7ad566 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -466,7 +466,7 @@ def dbnode(self): # self._dbnode = DbNode.objects.get(pk=self._dbnode.pk) return self._dbnode - def store_all(self, with_transaction=True): + def store_all(self, with_transaction=True, find_same=False): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -503,7 +503,7 @@ def store_all(self, with_transaction=True): # Always without transaction: either it is the context_man here, # or it is managed outside self._store_input_nodes() - self.store(with_transaction=False) + self.store(with_transaction=False, find_same=find_same) self._store_cached_input_links(with_transaction=False) return self diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index f38b4bcd5e..02198e6ef6 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -509,7 +509,7 @@ def id(self): def dbnode(self): return self._dbnode - def store_all(self, with_transaction=True): + def store_all(self, with_transaction=True, find_same=False): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -535,7 +535,7 @@ def store_all(self, with_transaction=True): "grandparents or other ancestors".format(parent_node.uuid)) self._store_input_nodes() - self.store(with_transaction=False) + self.store(with_transaction=False, find_same=find_same) self._store_cached_input_links(with_transaction=False) from aiida.backends.sqlalchemy import get_scoped_session session = get_scoped_session() @@ -601,7 +601,7 @@ def _store_cached_input_links(self, with_transaction=True): session.rollback() raise - def store(self, with_transaction=True): + def store(self, with_transaction=True, find_same=False): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -617,6 +617,8 @@ def store(self, with_transaction=True): :parameter with_transaction: if False, no transaction is used. This is meant to be used ONLY if the outer calling function has already a transaction open! + + :param bool find_same: Whether I attempt to find an equal node in the DB. """ from aiida.backends.sqlalchemy import get_scoped_session session = get_scoped_session() @@ -635,6 +637,17 @@ def store(self, with_transaction=True): # I assume that if a node exists in the DB, its folder is in place. # On the other hand, periodically the user might need to run some # bookkeeping utility to check for lone folders. + + # For node hashing, if find_same is true: + if find_same: + same_node = self.get_same_node() + if same_node is not None: + #~ pass + self._dbnode = same_node.dbnode + self._to_be_stored = False + self._repo_folder = same_node._repo_folder + return self + self._repository_folder.replace_with_folder( self._get_temp_folder().abspath, move=True, overwrite=True) From 1bc7c90b38c22fb7e9338f5423653c77846393a2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 18:10:57 +0200 Subject: [PATCH 018/101] Simplify get_same_node --- aiida/orm/implementation/django/node.py | 1 - aiida/orm/implementation/general/node.py | 10 +++------- aiida/orm/implementation/sqlalchemy/node.py | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 927a7ad566..54034e4902 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -613,7 +613,6 @@ def store(self, with_transaction=True, find_same=False): if find_same: same_node = self.get_same_node() if same_node is not None: - #~ pass self._dbnode = same_node.dbnode self._to_be_stored = False self._repo_folder = same_node._repo_folder diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 8f485dad39..3887df76c7 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1522,13 +1522,9 @@ def get_same_node(self): qb = QueryBuilder() qb.append(self.__class__, filters={'extras.hash':hash_}, project='*', subclassing=False) same_node = qb.first() - else: - same_node = None - - if same_node: - return same_node[0] - else: - return None + if same_node: + return same_node[0] + return None @property def out(self): diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 02198e6ef6..f6efae9650 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -642,7 +642,6 @@ def store(self, with_transaction=True, find_same=False): if find_same: same_node = self.get_same_node() if same_node is not None: - #~ pass self._dbnode = same_node.dbnode self._to_be_stored = False self._repo_folder = same_node._repo_folder From c5df6bf6c54dade644304bd51b5df34dbc18a9e6 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 18:13:10 +0200 Subject: [PATCH 019/101] Set hash extra on SQLAlchemy nodes --- aiida/orm/implementation/sqlalchemy/node.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index f6efae9650..a9addaaa09 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -702,6 +702,9 @@ def store(self, with_transaction=True, find_same=False): g = Group.get_or_create(name=group_name, type_string=grouptype)[0] g.add_nodes(self) + from aiida.backends.sqlalchemy.db.models import DbExtra + # Store the hash without cleaning and without incrementing the nodeversion number + DbExtra.set_value_for_node(self.dbnode, 'hash', self.get_hash()) return self @property From e5c72a186600b6c8407427f879ea03794a269a99 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 18:15:55 +0200 Subject: [PATCH 020/101] Change how extra is stored on SQLAlchemy node --- aiida/orm/implementation/sqlalchemy/node.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index a9addaaa09..61d39d5972 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -702,9 +702,7 @@ def store(self, with_transaction=True, find_same=False): g = Group.get_or_create(name=group_name, type_string=grouptype)[0] g.add_nodes(self) - from aiida.backends.sqlalchemy.db.models import DbExtra - # Store the hash without cleaning and without incrementing the nodeversion number - DbExtra.set_value_for_node(self.dbnode, 'hash', self.get_hash()) + self.dbnode.set_extra('hash', self.get_hash()) return self @property From a896eb102d983742f3b27f050dea9fb389a02c81 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 19:44:56 +0200 Subject: [PATCH 021/101] Use Folder interface for hashing the repository folder --- aiida/common/hashing.py | 34 +++++++++++++----------- aiida/orm/implementation/general/node.py | 2 +- setup_requirements.py | 1 - 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index 46b5c109f8..e723a91b72 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -18,12 +18,12 @@ from functools import singledispatch from collections import abc except ImportError: # Python2 - import pathlib2 as pathlib from singledispatch import singledispatch import collections as abc import numpy as np -import checksumdir + +from .folders import Folder """ Here we define a single password hashing instance for the full AiiDA. @@ -198,9 +198,9 @@ def make_hash(object_to_hash): ) @make_hash.register(abc.Sequence) -def _(object_to_hash): +def _(sequence): hashes = tuple([ - make_hash(x) for x in object_to_hash + make_hash(x) for x in sequence ]) return make_hash_with_type('L', ",".join(hashes)) @@ -214,11 +214,11 @@ def _(object_to_hash): return make_hash_with_type('S', ",".join(hashes)) @make_hash.register(abc.Mapping) -def _(object_to_hash): +def _(mapping): hashed_dictionary = { k: make_hash(v) for k,v - in object_to_hash.items() + in mapping.items() } return make_hash_with_type( 'D', @@ -262,15 +262,19 @@ def _(object_to_hash): def _(object_to_hash): return make_hash_with_type('d', str(object_to_hash)) -@make_hash.register(pathlib.Path) -def _(object_to_hash): - try: - return make_hash_with_type( - 'pd', - checksumdir.dirhash(str(object_to_hash)) - ) - except TypeError: - raise ValueError('Cannot hash pathlib.Path unless it is a directory.') +@make_hash.register(Folder) +def _(folder): + return make_hash_with_type( + 'pd', + make_hash([ + ( + name, + folder.get_subfolder(name) if folder.isdir(name) else + make_hash_with_type('pf', folder.open(name).read()) + ) + for name in sorted(folder.get_content_list()) + ]) + ) @make_hash.register(np.ndarray) def _(object_to_hash): diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 3887df76c7..6ba6e4ea5d 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1509,7 +1509,7 @@ def get_hash(self): try: return make_hash([ self.get_attrs(), - pathlib.Path(self.get_abs_path()) + self.folder ]) except: return None diff --git a/setup_requirements.py b/setup_requirements.py index 4aa021ffd1..2f68d9e49e 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -23,7 +23,6 @@ 'future', 'pathlib2', 'singledispatch >= 3.4.0.0', - 'checksumdir >= 1.1.4', # We need for the time being to stay with an old version # of celery, including the versions of the AMQP libraries below, # because the support for a SQLA broker has been dropped in later From d13f8dd37ac7fb60eb5f31feacf3f1fb3aeeae7a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 19:55:12 +0200 Subject: [PATCH 022/101] Add logic to ignore attributes --- aiida/orm/implementation/general/node.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 6ba6e4ea5d..dd16f775b7 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -144,6 +144,9 @@ def __new__(cls, name, bases, attrs): # See documentation in the set() method. _set_incompatibilities = [] + # A list of attribute names that will be ignored when creating the hash. + _hash_ignored_attributes = [] + def get_desc(self): """ Returns a string with infos retrieved from a node's @@ -1508,7 +1511,10 @@ def get_hash(self): from aiida.common.hashing import make_hash try: return make_hash([ - self.get_attrs(), + { + key: val for key, val in self.get_attrs().items() + if key not in self._hash_ignored_attributes + }, self.folder ]) except: From 533e9684851dd3954e6c4ce6d6bd4f891af2401b Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 20 Jul 2017 22:08:48 +0200 Subject: [PATCH 023/101] Add tests for FolderData with empty files and folders. Close files after reading them to create the hash. --- aiida/backends/tests/nodes.py | 39 +++++++++++++++++++++++++++++++++++ aiida/common/hashing.py | 7 ++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index b8047b2b56..1547cb25fa 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -44,6 +44,45 @@ def test_simple_equal_nodes(self): self.assertEqual(n1.uuid, n2.uuid) self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) + @staticmethod + def create_folderdata_with_empty_file(): + from aiida.orm.data.folder import FolderData + res = FolderData() + with res.folder.get_subfolder('path').open('name', 'w') as f: + pass + return res + + @staticmethod + def create_folderdata_with_empty_folder(): + from aiida.orm.data.folder import FolderData + res = FolderData() + res.folder.get_subfolder('path/name').create() + return res + + def test_folder_file_different(self): + f1 = self.create_folderdata_with_empty_file() + f2 = self.create_folderdata_with_empty_folder() + + assert ( + f1.folder.get_subfolder('path').get_content_list() == + f2.folder.get_subfolder('path').get_content_list() + ) + assert f1.get_hash() != f2.get_hash() + + def test_folder_same(self): + f1 = self.create_folderdata_with_empty_folder() + f2 = self.create_folderdata_with_empty_folder() + f1.store() + f2.store(find_same=True) + assert f1.uuid == f2.uuid + + def test_file_same(self): + f1 = self.create_folderdata_with_empty_file() + f2 = self.create_folderdata_with_empty_file() + f1.store() + f2.store(find_same=True) + assert f1.uuid == f2.uuid + def test_simple_unequal_nodes(self): attributes = [ [(1.0, 1.1, 1.2), (2.0, 1.1, 1.2)], diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index e723a91b72..d9bd56a65d 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -264,13 +264,18 @@ def _(object_to_hash): @make_hash.register(Folder) def _(folder): + # make sure file is closed after being read + def _read_file(folder, name): + with folder.open(name) as f: + return f.read() + return make_hash_with_type( 'pd', make_hash([ ( name, folder.get_subfolder(name) if folder.isdir(name) else - make_hash_with_type('pf', folder.open(name).read()) + make_hash_with_type('pf', _read_file(folder, name)) ) for name in sorted(folder.get_content_list()) ]) From 55d1ad0c41286786787e4f2f405a9eca4f0def82 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 21 Jul 2017 10:01:19 +0200 Subject: [PATCH 024/101] Add functionality to set the default of caching True / False --- aiida/backends/tests/nodes.py | 26 ++++++++-- aiida/common/_caching_defaults.py | 54 +++++++++++++++++++++ aiida/common/caching.py | 4 ++ aiida/orm/implementation/django/node.py | 13 ++--- aiida/orm/implementation/sqlalchemy/node.py | 13 ++--- 5 files changed, 93 insertions(+), 17 deletions(-) create mode 100644 aiida/common/_caching_defaults.py create mode 100644 aiida/common/caching.py diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 1547cb25fa..1d69f7a8e2 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -15,6 +15,7 @@ from aiida.backends.testbase import AiidaTestCase from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType +from aiida.common import caching from aiida.orm.data import Data from aiida.orm.node import Node from aiida.orm.utils import load_node @@ -39,8 +40,23 @@ def test_simple_equal_nodes(self): for attr in attributes: n1 = self.create_simple_node(*attr) n2 = self.create_simple_node(*attr) + n1.store(use_cache=True) + n2.store(use_cache=True) + self.assertEqual(n1.uuid, n2.uuid) + self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) + + def test_set_default(self): + attributes = [ + (1.0, 2, 3), + ({'a': 'b', 'c': 'd'}, [1, 2, 3], {4, 2, 2}) + ] + for attr in attributes: + n1 = self.create_simple_node(*attr) + n2 = self.create_simple_node(*attr) + caching.defaults.use_cache = True n1.store() - n2.store(find_same=True) + n2.store() + caching.defaults.use_cache = False self.assertEqual(n1.uuid, n2.uuid) self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) @@ -73,14 +89,14 @@ def test_folder_same(self): f1 = self.create_folderdata_with_empty_folder() f2 = self.create_folderdata_with_empty_folder() f1.store() - f2.store(find_same=True) + f2.store(use_cache=True) assert f1.uuid == f2.uuid def test_file_same(self): f1 = self.create_folderdata_with_empty_file() f2 = self.create_folderdata_with_empty_file() f1.store() - f2.store(find_same=True) + f2.store(use_cache=True) assert f1.uuid == f2.uuid def test_simple_unequal_nodes(self): @@ -92,7 +108,7 @@ def test_simple_unequal_nodes(self): n1 = self.create_simple_node(*attr1) n2 = self.create_simple_node(*attr2) n1.store() - n2.store(find_same=True) + n2.store(use_cache=True) self.assertNotEquals(n1.uuid, n2.uuid) def test_unequal_arrays(self): @@ -111,7 +127,7 @@ def create_arraydata(arr): a1 = create_arraydata(arr1) a1.store() a2 = create_arraydata(arr2) - a2.store(find_same=True) + a2.store(use_cache=True) self.assertNotEquals(a1.uuid, a2.uuid) class TestDataNode(AiidaTestCase): diff --git a/aiida/common/_caching_defaults.py b/aiida/common/_caching_defaults.py new file mode 100644 index 0000000000..60177eadf3 --- /dev/null +++ b/aiida/common/_caching_defaults.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Defines the default values for using the caching mechanism. The values are mutable, meaning they have the following behavior: + +.. code :: python + >>> x = defaults.use_cache + >>> print(x) + False + >>> defaults.use_cache = True + >>> print(x) + True +""" + +__all__ = ['defaults'] + +class _MutableBool(object): + def __init__(self, value): + self.value = value + + def get(self): + return self.value + + def set(self, value): + self.value = value + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + return self.value + + def __str__(self): + return str(bool(self)) + +class _Defaults(object): + """ + A class containing the default values for caching. + """ + __slots__ = ['_use_cache'] + + def __init__(self): + self._use_cache = _MutableBool(False) + + @property + def use_cache(self): + return self._use_cache + + @use_cache.setter + def use_cache(self, value): + self._use_cache.set(value) + +defaults = _Defaults() diff --git a/aiida/common/caching.py b/aiida/common/caching.py new file mode 100644 index 0000000000..6d38464157 --- /dev/null +++ b/aiida/common/caching.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from ._caching_defaults import defaults diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 54034e4902..8a1dd07307 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -21,6 +21,7 @@ from aiida.common.folders import RepositoryFolder from aiida.common.links import LinkType from aiida.common.utils import get_new_uuid +from aiida.common import caching from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.mixins import Sealable # from aiida.orm.implementation.django.utils import get_db_columns @@ -466,7 +467,7 @@ def dbnode(self): # self._dbnode = DbNode.objects.get(pk=self._dbnode.pk) return self._dbnode - def store_all(self, with_transaction=True, find_same=False): + def store_all(self, with_transaction=True, use_cache=caching.defaults.use_cache): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -503,7 +504,7 @@ def store_all(self, with_transaction=True, find_same=False): # Always without transaction: either it is the context_man here, # or it is managed outside self._store_input_nodes() - self.store(with_transaction=False, find_same=find_same) + self.store(with_transaction=False, use_cache=use_cache) self._store_cached_input_links(with_transaction=False) return self @@ -558,7 +559,7 @@ def _store_cached_input_links(self, with_transaction=True): # would have been raised, and the following lines are not executed) self._inputlinks_cache.clear() - def store(self, with_transaction=True, find_same=False): + def store(self, with_transaction=True, use_cache=caching.defaults.use_cache): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -575,7 +576,7 @@ def store(self, with_transaction=True, find_same=False): is meant to be used ONLY if the outer calling function has already a transaction open! - :param bool find_same: Whether I attempt to find an equal node in the DB. + :param bool use_cache: Whether I attempt to find an equal node in the DB. """ # TODO: This needs to be generalized, allowing for flexible methods # for storing data and its attributes. @@ -609,8 +610,8 @@ def store(self, with_transaction=True, find_same=False): # bookkeeping utility to check for lone folders. - # For node hashing, if find_same is true: - if find_same: + # For node hashing, if use_cache is true: + if use_cache: same_node = self.get_same_node() if same_node is not None: self._dbnode = same_node.dbnode diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 61d39d5972..4c6efb72ea 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -28,6 +28,7 @@ NotExistent, UniquenessError, ValidationError) from aiida.common.links import LinkType +from aiida.common import caching from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.implementation.sqlalchemy.computer import Computer @@ -509,7 +510,7 @@ def id(self): def dbnode(self): return self._dbnode - def store_all(self, with_transaction=True, find_same=False): + def store_all(self, with_transaction=True, use_cache=caching.defaults.use_cache): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -535,7 +536,7 @@ def store_all(self, with_transaction=True, find_same=False): "grandparents or other ancestors".format(parent_node.uuid)) self._store_input_nodes() - self.store(with_transaction=False, find_same=find_same) + self.store(with_transaction=False, use_cache=use_cache) self._store_cached_input_links(with_transaction=False) from aiida.backends.sqlalchemy import get_scoped_session session = get_scoped_session() @@ -601,7 +602,7 @@ def _store_cached_input_links(self, with_transaction=True): session.rollback() raise - def store(self, with_transaction=True, find_same=False): + def store(self, with_transaction=True, use_cache=caching.defaults.use_cache): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -618,7 +619,7 @@ def store(self, with_transaction=True, find_same=False): is meant to be used ONLY if the outer calling function has already a transaction open! - :param bool find_same: Whether I attempt to find an equal node in the DB. + :param bool use_cache: Whether I attempt to find an equal node in the DB. """ from aiida.backends.sqlalchemy import get_scoped_session session = get_scoped_session() @@ -638,8 +639,8 @@ def store(self, with_transaction=True, find_same=False): # On the other hand, periodically the user might need to run some # bookkeeping utility to check for lone folders. - # For node hashing, if find_same is true: - if find_same: + # For node hashing, if use_cache is true: + if use_cache: same_node = self.get_same_node() if same_node is not None: self._dbnode = same_node.dbnode From 7a0d4efbf2da476cbd41f2765dc8442a26a60f0a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 21 Jul 2017 11:00:39 +0200 Subject: [PATCH 025/101] Add contextmanager to enable / disable caching --- aiida/backends/tests/nodes.py | 7 +++---- aiida/common/_caching_defaults.py | 20 ++++++++++++++++---- aiida/common/caching.py | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 1d69f7a8e2..f782028fc4 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -53,10 +53,9 @@ def test_set_default(self): for attr in attributes: n1 = self.create_simple_node(*attr) n2 = self.create_simple_node(*attr) - caching.defaults.use_cache = True - n1.store() - n2.store() - caching.defaults.use_cache = False + with caching.EnableCaching(): + n1.store() + n2.store() self.assertEqual(n1.uuid, n2.uuid) self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) diff --git a/aiida/common/_caching_defaults.py b/aiida/common/_caching_defaults.py index 60177eadf3..50765d19c6 100644 --- a/aiida/common/_caching_defaults.py +++ b/aiida/common/_caching_defaults.py @@ -13,7 +13,7 @@ True """ -__all__ = ['defaults'] +__all__ = ['defaults', 'EnableCaching'] class _MutableBool(object): def __init__(self, value): @@ -38,8 +38,6 @@ class _Defaults(object): """ A class containing the default values for caching. """ - __slots__ = ['_use_cache'] - def __init__(self): self._use_cache = _MutableBool(False) @@ -49,6 +47,20 @@ def use_cache(self): @use_cache.setter def use_cache(self, value): - self._use_cache.set(value) + self._use_cache.set(bool(value)) defaults = _Defaults() + +class EnableCaching(object): + """ + Context manager to temporarily set the caching default. The previous value will be restored upon exiting. + """ + def __init__(self, value=True): + self.value = value + + def __enter__(self): + self.previous_value = bool(defaults.use_cache) + defaults.use_cache = self.value + + def __exit__(self, exc_type, exc_val, exc_tb): + defaults.use_cache = self.previous_value diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 6d38464157..56ea833d23 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -1,4 +1,4 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from ._caching_defaults import defaults +from ._caching_defaults import defaults, EnableCaching From 148770df25dcbd2b66d14a2d276dac961f0b2c8a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 21 Jul 2017 13:55:51 +0200 Subject: [PATCH 026/101] Change caching default in store methods to False. The caching.defaults.use_cache parameter should be used by plugin developers to mark whether a specific .store() call CAN actually use caching. The user decides whether to use it in the end by setting the default to True / False. --- aiida/backends/tests/nodes.py | 4 ++-- aiida/orm/implementation/django/node.py | 5 ++--- aiida/orm/implementation/sqlalchemy/node.py | 5 ++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index f782028fc4..3fcd79e2ab 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -54,8 +54,8 @@ def test_set_default(self): n1 = self.create_simple_node(*attr) n2 = self.create_simple_node(*attr) with caching.EnableCaching(): - n1.store() - n2.store() + n1.store(use_cache=caching.defaults.use_cache) + n2.store(use_cache=caching.defaults.use_cache) self.assertEqual(n1.uuid, n2.uuid) self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 8a1dd07307..f651dbad44 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -21,7 +21,6 @@ from aiida.common.folders import RepositoryFolder from aiida.common.links import LinkType from aiida.common.utils import get_new_uuid -from aiida.common import caching from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.mixins import Sealable # from aiida.orm.implementation.django.utils import get_db_columns @@ -467,7 +466,7 @@ def dbnode(self): # self._dbnode = DbNode.objects.get(pk=self._dbnode.pk) return self._dbnode - def store_all(self, with_transaction=True, use_cache=caching.defaults.use_cache): + def store_all(self, with_transaction=True, use_cache=False): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -559,7 +558,7 @@ def _store_cached_input_links(self, with_transaction=True): # would have been raised, and the following lines are not executed) self._inputlinks_cache.clear() - def store(self, with_transaction=True, use_cache=caching.defaults.use_cache): + def store(self, with_transaction=True, use_cache=False): """ Store a new node in the DB, also saving its repository directory and attributes. diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 4c6efb72ea..68f6ae5b1a 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -28,7 +28,6 @@ NotExistent, UniquenessError, ValidationError) from aiida.common.links import LinkType -from aiida.common import caching from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.implementation.sqlalchemy.computer import Computer @@ -510,7 +509,7 @@ def id(self): def dbnode(self): return self._dbnode - def store_all(self, with_transaction=True, use_cache=caching.defaults.use_cache): + def store_all(self, with_transaction=True, use_cache=False): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -602,7 +601,7 @@ def _store_cached_input_links(self, with_transaction=True): session.rollback() raise - def store(self, with_transaction=True, use_cache=caching.defaults.use_cache): + def store(self, with_transaction=True, use_cache=False): """ Store a new node in the DB, also saving its repository directory and attributes. From a3bc4b5610d639338063ae7e8a780e700ad599da Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 21 Jul 2017 15:35:17 +0200 Subject: [PATCH 027/101] Add module.__version__ to hash of a Node --- aiida/orm/implementation/general/node.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index dd16f775b7..a091ed71d0 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -10,10 +10,11 @@ from abc import ABCMeta, abstractmethod, abstractproperty from aiida.common.utils import abstractclassmethod -import collections -import logging import os import types +import logging +import importlib +import collections try: import pathlib except ImportError: @@ -1511,6 +1512,9 @@ def get_hash(self): from aiida.common.hashing import make_hash try: return make_hash([ + importlib.import_module( + self.__module__.split('.', 1)[0] + ).__version__, { key: val for key, val in self.get_attrs().items() if key not in self._hash_ignored_attributes From bfad737de4ca4614d83bb28ad64e9aa6068cf12e Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 2 Aug 2017 15:45:43 +0200 Subject: [PATCH 028/101] Add _is_valid_cache, to exclude failed calculations --- aiida/backends/tests/work/workfunction.py | 10 ++++++++++ aiida/orm/implementation/general/calculation/work.py | 2 +- aiida/orm/implementation/general/node.py | 9 ++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index 2c69e3dedb..c4d8799ec8 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -12,6 +12,7 @@ from aiida.backends.testbase import AiidaTestCase from aiida.work.workfunction import workfunction from aiida.orm.data.base import get_true_node +from aiida.orm import load_node from aiida.work.run import async, run import aiida.work.util as util @@ -49,3 +50,12 @@ def test_async(self): def test_run(self): self.assertTrue(run(simple_wf)['result']) self.assertTrue(run(return_input, get_true_node())['result']) + + def test_caching(self): + r, pid = run(simple_wf, _return_pid=True) + print(type(r)) + print(r) + print(pid) + n = load_node(pid) + print(n) + print(type(n)) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index c212b0de71..3253a7a31c 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -39,4 +39,4 @@ def has_failed(self): :return: True if the calculation has failed, False otherwise. :rtype: bool """ - return self.get_attr(self.FAILED_KEY, False) is not False \ No newline at end of file + return self.get_attr(self.FAILED_KEY, False) is not False diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index a091ed71d0..9da74fea4f 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -148,6 +148,9 @@ def __new__(cls, name, bases, attrs): # A list of attribute names that will be ignored when creating the hash. _hash_ignored_attributes = [] + # An attribute / property that defines whether the node should be used as cache + _is_valid_cache = True + def get_desc(self): """ Returns a string with infos retrieved from a node's @@ -1531,9 +1534,9 @@ def get_same_node(self): if hash_: qb = QueryBuilder() qb.append(self.__class__, filters={'extras.hash':hash_}, project='*', subclassing=False) - same_node = qb.first() - if same_node: - return same_node[0] + for same_node, in qb.iterall(): + if same_node._is_valid_cache: + return same_node return None @property From 6f37dddebcef994c6a9ce967aca10d370ca80639 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 7 Aug 2017 10:45:01 +0200 Subject: [PATCH 029/101] Add debug statements in subcalss run_until_complete --- aiida/backends/tests/work/workfunction.py | 12 +++++------- aiida/work/process.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index c4d8799ec8..c500dde1f2 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -15,6 +15,7 @@ from aiida.orm import load_node from aiida.work.run import async, run import aiida.work.util as util +from aiida.common import caching @@ -52,10 +53,7 @@ def test_run(self): self.assertTrue(run(return_input, get_true_node())['result']) def test_caching(self): - r, pid = run(simple_wf, _return_pid=True) - print(type(r)) - print(r) - print(pid) - n = load_node(pid) - print(n) - print(type(n)) + with caching.EnableCaching(): + r, pid = run(simple_wf, _return_pid=True) + r2, pid2 = run(simple_wf, _return_pid=True) + self.assertEqual(pid, pid2) diff --git a/aiida/work/process.py b/aiida/work/process.py index c8b9683d2f..3b44e1eb88 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -25,6 +25,7 @@ import aiida.common.exceptions as exceptions from aiida.common.lang import override, protected from aiida.common.links import LinkType +from aiida.common import caching from aiida.utils.calculation import add_source_info from aiida.work.defaults import class_loader import aiida.work.util @@ -44,7 +45,7 @@ def __call__(self, value): """ Call this to validate the value against the schema. - :param value: a regular dictionary or a ParameterData instance + :param value: a regular dictionary or a ParameterData instance :return: tuple (success, msg). success is True if the value is valid and False otherwise, in which case msg will contain information about the validation failure. @@ -314,11 +315,16 @@ def report(self, msg, *args, **kwargs): # del parsed[name] # return parsed + def run_until_complete(self): + with open('/home/greschd/Desktop/foo.txt', 'a') as f: + f.write(str(type(self).__mro__) + '\n') + super(Process, self).run_until_complete() + def _create_and_setup_db_record(self): self._calc = self.create_db_record() self._setup_db_record() if self.inputs._store_provenance: - self.calc.store_all() + self.calc.store_all(use_cache=caching.defaults.use_cache) if self.calc.pk is not None: return self.calc.pk From 06acb5c4ca430254066fa0ee35774b5d86f05112 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 14 Aug 2017 16:16:25 +0200 Subject: [PATCH 030/101] Implement fast-forwarding in _create_and_setup_db_record --- aiida/backends/tests/work/workfunction.py | 11 +++++++--- aiida/work/process.py | 26 +++++++++++------------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index c500dde1f2..80ac15d623 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -11,7 +11,7 @@ import plum.process_monitor from aiida.backends.testbase import AiidaTestCase from aiida.work.workfunction import workfunction -from aiida.orm.data.base import get_true_node +from aiida.orm.data.base import get_true_node, Int from aiida.orm import load_node from aiida.work.run import async, run import aiida.work.util as util @@ -53,7 +53,12 @@ def test_run(self): self.assertTrue(run(return_input, get_true_node())['result']) def test_caching(self): + @workfunction + def simple_cached_wf(inp): + return {'result': inp} + with caching.EnableCaching(): - r, pid = run(simple_wf, _return_pid=True) - r2, pid2 = run(simple_wf, _return_pid=True) + r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) + r2, pid2 = run(simple_cached_wf, inp=Int(2), _return_pid=True, _fast_forward=True) self.assertEqual(pid, pid2) + self.assertEqual(r, r2) diff --git a/aiida/work/process.py b/aiida/work/process.py index 3b44e1eb88..a2466e7017 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -18,6 +18,7 @@ import plum.process from plum.process_monitor import MONITOR import plum.process_monitor +from plum.error import FastForwardError import voluptuous from abc import ABCMeta @@ -315,16 +316,17 @@ def report(self, msg, *args, **kwargs): # del parsed[name] # return parsed - def run_until_complete(self): - with open('/home/greschd/Desktop/foo.txt', 'a') as f: - f.write(str(type(self).__mro__) + '\n') - super(Process, self).run_until_complete() - def _create_and_setup_db_record(self): self._calc = self.create_db_record() self._setup_db_record() if self.inputs._store_provenance: - self.calc.store_all(use_cache=caching.defaults.use_cache) + self.calc.store_all(use_cache=self._fast_forward_enabled()) + if self.calc.has_finished_ok(): + self._state = plum.process.ProcessState.FINISHED + for name, value in self.calc.get_outputs_dict(link_type=LinkType.RETURN).items(): + if name.endswith('_{pk}'.format(pk=value.pk)): + continue + self.out(name, value) if self.calc.pk is not None: return self.calc.pk @@ -386,14 +388,10 @@ def _setup_db_record(self): if '_label' in self.raw_inputs: self.calc.label = self.raw_inputs._label - def _can_fast_forward(self, inputs): - return False - - def _fast_forward(self): - node = None # Here we should find the old node - for k, v in node.get_output_dict(): - self.out(k, v) - + def _fast_forward_enabled(self): + # First priority: inputs + return self._parsed_inputs.get('_fast_forward', False) + # TODO: Add process and global level settings class FunctionProcess(Process): _func_args = None From c4027ff62f5a31f4f46aac6d862d243d06aafdf0 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 14 Aug 2017 17:14:51 +0200 Subject: [PATCH 031/101] Add inputs hash for WorkCalculation Nodes --- aiida/backends/tests/work/workfunction.py | 35 ++++++++++++++++--- .../general/calculation/work.py | 18 ++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index 80ac15d623..78770c64bd 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -52,13 +52,38 @@ def test_run(self): self.assertTrue(run(simple_wf)['result']) self.assertTrue(run(return_input, get_true_node())['result']) + def test_hashes(self): + _, pid1 = run(return_input, inp=Int(2), _return_pid=True) + _, pid2 = run(return_input, inp=Int(2), _return_pid=True) + w1 = load_node(pid1) + w2 = load_node(pid2) + self.assertEqual(w1.get_hash(), w2.get_hash()) + + def test_hashes_different(self): + _, pid1 = run(return_input, inp=Int(2), _return_pid=True) + _, pid2 = run(return_input, inp=Int(3), _return_pid=True) + w1 = load_node(pid1) + w2 = load_node(pid2) + self.assertNotEqual(w1.get_hash(), w2.get_hash()) + def test_caching(self): + # Creating a new workfunction to avoid getting other results. + @workfunction + def simple_cached_wf(inp): + return {'result': inp} + + r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) + r2, pid2 = run(simple_cached_wf, inp=Int(2), _return_pid=True, _fast_forward=True) + self.assertEqual(r, r2) + self.assertEqual(pid, pid2) + + def test_caching_different(self): + # Creating a new workfunction to avoid getting other results. @workfunction def simple_cached_wf(inp): return {'result': inp} - with caching.EnableCaching(): - r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) - r2, pid2 = run(simple_cached_wf, inp=Int(2), _return_pid=True, _fast_forward=True) - self.assertEqual(pid, pid2) - self.assertEqual(r, r2) + r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) + r2, pid2 = run(simple_cached_wf, inp=Int(3), _return_pid=True, _fast_forward=True) + self.assertNotEqual(r, r2) + self.assertNotEqual(pid, pid2) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index 3253a7a31c..f75b91d11d 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -19,6 +19,8 @@ class WorkCalculation(Calculation): FINISHED_KEY = '_finished' FAILED_KEY = '_failed' + _hash_ignored_inputs = [] + @override def has_finished_ok(self): """ @@ -40,3 +42,19 @@ def has_failed(self): :rtype: bool """ return self.get_attr(self.FAILED_KEY, False) is not False + + def get_hash(self): + from aiida.common.hashing import make_hash + base_hash = super(WorkCalculation, self).get_hash() + if base_hash is None: + return None + try: + return make_hash([ + base_hash, + { + key: value.get_hash() for key, value in self.get_inputs_dict().items() + if key not in self._hash_ignored_inputs + } + ]) + except: + return None From 66931525fd15a255e2eb6659231e7a9317233b8f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 14 Aug 2017 23:06:44 +0200 Subject: [PATCH 032/101] Add tests for WorkChain fast-forwarding --- aiida/backends/tests/work/workChain.py | 62 +++++++++++++++++++ .../general/calculation/work.py | 3 +- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index 60bfd576e9..b9e6b22eb2 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -12,6 +12,7 @@ import unittest import aiida.backends.settings as settings +from aiida.orm import load_node from aiida.backends.testbase import AiidaTestCase from plum.engine.ticking import TickingEngine import plum.process_monitor @@ -338,6 +339,67 @@ def _run_with_checkpoints(self, wf_class, inputs=None): return finished_steps +class TestFastForwardingWorkChain(TestWorkchain): + def setUp(self): + super(TestFastForwardingWorkChain, self).setUp() + class ReturnInputsFastForward(WorkChain): + @classmethod + def define(cls, spec): + super(ReturnInputsFastForward, cls).define(spec) + spec.input('a', valid_type=Int) + spec.input('b', valid_type=Int, default=Int(2), required=False) + spec.outline(cls.return_inputs) + spec.deterministic() + + def return_inputs(self): + self.out('a', self.inputs.a) + self.out('b', self.inputs.b) + self.wf_class = ReturnInputsFastForward + self.reference_result, self.reference_pid = run( + self.wf_class, a=Int(1), b=Int(2), _return_pid=True + ) + self.reference_wc = load_node(self.reference_pid) + + def tearDown(self): + super(TestFastForwardingWorkChain, self).tearDown() + self.clean_db() + + def test_hash(self): + res, pid = run( + self.wf_class, + a=Int(1), b=Int(2), + _fast_forward=True, _return_pid=True + ) + wc = load_node(pid) + self.assertEquals(wc.get_hash(), self.reference_wc.get_hash()) + self.assertNotEquals(wc.get_hash(), None) + + def test_fastforwarding(self): + res, pid = run( + self.wf_class, + a=Int(1), b=Int(2), + _fast_forward=True, _return_pid=True + ) + self.assertEquals(pid, self.reference_pid) + + def test_fastforwarding_default(self): + res, pid = run( + self.wf_class, + a=Int(1), + _fast_forward=True, _return_pid=True + ) + self.assertEquals(pid, self.reference_pid) + self.assertEquals(res, self.reference_result) + + def test_fastforwarding_different(self): + res, pid = run( + self.wf_class, + a=Int(2), b=Int(1), + _fast_forward=True, _return_pid=True + ) + self.assertNotEquals(pid, self.reference_pid) + self.assertNotEquals(res, self.reference_result) + class TestWorkchainWithOldWorkflows(AiidaTestCase): def test_call_old_wf(self): diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index f75b91d11d..677e0fd696 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -19,7 +19,8 @@ class WorkCalculation(Calculation): FINISHED_KEY = '_finished' FAILED_KEY = '_failed' - _hash_ignored_inputs = [] + _hash_ignored_inputs = ['_return_pid', '_fast_forward'] + _hash_ignored_attributes = ['_finished', '_sealed'] @override def has_finished_ok(self): From 8e56a54b601151a2e6852ecb5a2ba274484f6c40 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 15 Aug 2017 09:15:45 +0200 Subject: [PATCH 033/101] Add more tests to WorkChain forwarding --- aiida/backends/tests/work/workChain.py | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index b9e6b22eb2..165ec81172 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -360,6 +360,20 @@ def return_inputs(self): ) self.reference_wc = load_node(self.reference_pid) + class ReturnInputsFastForward(WorkChain): + @classmethod + def define(cls, spec): + super(ReturnInputsFastForward, cls).define(spec) + spec.input('a', valid_type=Int) + spec.input('b', valid_type=Int, default=Int(2), required=False) + spec.outline(cls.return_inputs) + spec.deterministic() + + def return_inputs(self): + raise ValueError + + self.wf_class_broken = ReturnInputsFastForward + def tearDown(self): super(TestFastForwardingWorkChain, self).tearDown() self.clean_db() @@ -381,6 +395,39 @@ def test_fastforwarding(self): _fast_forward=True, _return_pid=True ) self.assertEquals(pid, self.reference_pid) + self.assertEquals(res, self.reference_result) + + def test_fastforwarding_notexecuted(self): + res, pid = run( + self.wf_class_broken, + a=Int(1), b=Int(2), + _fast_forward=True, _return_pid=True + ) + self.assertEquals(pid, self.reference_pid) + self.assertEquals(res, self.reference_result) + + def test_fastforwarding_notexecuted_testworks(self): + self.assertRaises( + ValueError, + run, + self.wf_class_broken, + a=Int(1), b=Int(2), + _fast_forward=False, _return_pid=True + ) + + def test_fastforwarding_twice(self): + res1, pid1 = run( + self.wf_class, + a=Int(1), b=Int(2), + _fast_forward=True, _return_pid=True + ) + res2, pid2 = run( + self.wf_class, + a=Int(1), b=Int(2), + _fast_forward=True, _return_pid=True + ) + self.assertEquals(pid1, pid2) + self.assertEquals(res1, res2) def test_fastforwarding_default(self): res, pid = run( From e7493c70a0a1f9b04636c298a7604f826a256c74 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 15 Aug 2017 09:54:02 +0200 Subject: [PATCH 034/101] Add Travis hack to install plum from my fork --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5d6977edce..4549f6f9d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,6 +51,8 @@ install: - pip install -U pip wheel setuptools # Install AiiDA with some optional dependencies - pip install .[REST,docs,atomic_tools,testing] + # Unreleased plum version required for fast-forwarding + - pip install git+https://bitbucket.org/greschd/plum@edge before_script: # Here I create the actual DB for submission From ae485aca89ed007affaa74683a0532e93cc5df97 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 15 Aug 2017 10:21:48 +0200 Subject: [PATCH 035/101] Add insert_data to fast-forwarding workchain test setUp method --- aiida/backends/tests/work/workChain.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index 165ec81172..ae70c6e109 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -342,6 +342,7 @@ def _run_with_checkpoints(self, wf_class, inputs=None): class TestFastForwardingWorkChain(TestWorkchain): def setUp(self): super(TestFastForwardingWorkChain, self).setUp() + self.insert_data() class ReturnInputsFastForward(WorkChain): @classmethod def define(cls, spec): From fb8367e3a19b15d0ab5ad6ca6b9106a1e71e19bc Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 15 Aug 2017 10:43:13 +0200 Subject: [PATCH 036/101] Call tearDownClass and setUpClass in fast-forwarding tearDown --- aiida/backends/tests/work/workChain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index ae70c6e109..638dddd628 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -342,7 +342,6 @@ def _run_with_checkpoints(self, wf_class, inputs=None): class TestFastForwardingWorkChain(TestWorkchain): def setUp(self): super(TestFastForwardingWorkChain, self).setUp() - self.insert_data() class ReturnInputsFastForward(WorkChain): @classmethod def define(cls, spec): @@ -377,7 +376,8 @@ def return_inputs(self): def tearDown(self): super(TestFastForwardingWorkChain, self).tearDown() - self.clean_db() + super(TestFastForwardingWorkChain, self).tearDownClass() + super(TestFastForwardingWorkChain, self).setUpClass() def test_hash(self): res, pid = run( From 33e6d0d8104d86e210908f0d424800ce4083e752 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 2 Aug 2017 17:23:34 +0200 Subject: [PATCH 037/101] Add _is_valid_cache method to exclude invalid Nodes (e.g. failed Calculations) --- aiida/orm/implementation/general/calculation/work.py | 6 ++++++ aiida/orm/implementation/general/node.py | 11 +++++++---- aiida/work/process.py | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index 677e0fd696..c0cfd09290 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -10,6 +10,7 @@ from aiida.orm.implementation.calculation import Calculation from aiida.common.lang import override +from aiida.common import caching class WorkCalculation(Calculation): """ @@ -18,6 +19,8 @@ class WorkCalculation(Calculation): """ FINISHED_KEY = '_finished' FAILED_KEY = '_failed' + _hash_ignored_attributes = [FINISHED_KEY] + _use_cache = caching.defaults.use_cache _hash_ignored_inputs = ['_return_pid', '_fast_forward'] _hash_ignored_attributes = ['_finished', '_sealed'] @@ -59,3 +62,6 @@ def get_hash(self): ]) except: return None + + def _is_valid_cache(self): + return super(WorkCalculation, self)._is_valid_cache() and self.has_finished_ok diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 87a372e81c..43dac82124 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -151,9 +151,6 @@ def __new__(cls, name, bases, attrs): # A list of attribute names that will be ignored when creating the hash. _hash_ignored_attributes = [] - # An attribute / property that defines whether the node should be used as cache - _is_valid_cache = True - def get_desc(self): """ Returns a string with infos retrieved from a node's @@ -1518,10 +1515,16 @@ def get_same_node(self): qb = QueryBuilder() qb.append(self.__class__, filters={'extras.hash':hash_}, project='*', subclassing=False) for same_node, in qb.iterall(): - if same_node._is_valid_cache: + if same_node._is_valid_cache(): return same_node return None + def _is_valid_cache(self): + """ + Subclass hook to exclude certain Nodes (e.g. failed calculations) from being considered in the caching process. + """ + return True + @property def out(self): """ diff --git a/aiida/work/process.py b/aiida/work/process.py index a2466e7017..0fdcbf68c2 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -35,6 +35,7 @@ from aiida.orm.data.parameter import ParameterData from aiida.orm.calculation.work import WorkCalculation from aiida import LOG_LEVEL_REPORT +from aiida.common import caching From 404a6f6b207e1ffbddaa0977b6b71d3008a6737a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 15 Aug 2017 17:44:03 +0200 Subject: [PATCH 038/101] Remove unused _use_cache attribute --- aiida/orm/implementation/general/calculation/work.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index c0cfd09290..0fd686a828 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -20,7 +20,6 @@ class WorkCalculation(Calculation): FINISHED_KEY = '_finished' FAILED_KEY = '_failed' _hash_ignored_attributes = [FINISHED_KEY] - _use_cache = caching.defaults.use_cache _hash_ignored_inputs = ['_return_pid', '_fast_forward'] _hash_ignored_attributes = ['_finished', '_sealed'] From 471dcfa03f8111c48d75f49435268304a0b72a25 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 16 Aug 2017 10:13:36 +0200 Subject: [PATCH 039/101] Add 'ignore_errors' option to get_hash, raise errors instead of returning None if 'ignore_errors=False' --- .../orm/implementation/general/calculation/work.py | 14 ++++++++++---- aiida/orm/implementation/general/node.py | 9 ++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index 0fd686a828..2e5d295639 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -46,9 +46,12 @@ def has_failed(self): """ return self.get_attr(self.FAILED_KEY, False) is not False - def get_hash(self): + def _is_valid_cache(self): + return super(WorkCalculation, self)._is_valid_cache() and self.has_finished_ok + + def get_hash(self, ignore_errors=True): from aiida.common.hashing import make_hash - base_hash = super(WorkCalculation, self).get_hash() + base_hash = super(WorkCalculation, self).get_hash(ignore_errors=ignore_errors) if base_hash is None: return None try: @@ -59,8 +62,11 @@ def get_hash(self): if key not in self._hash_ignored_inputs } ]) - except: - return None + except Exception as e: + if ignore_errors: + return None + else: + raise e def _is_valid_cache(self): return super(WorkCalculation, self)._is_valid_cache() and self.has_finished_ok diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index fcde60ccc8..d35c176600 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1479,7 +1479,7 @@ def __del__(self): self._temp_folder.erase() - def get_hash(self): + def get_hash(self, ignore_errors=True): """ Making a hash based on my attributes """ @@ -1495,8 +1495,11 @@ def get_hash(self): }, self.folder ]) - except: - return None + except Exception as e: + if ignore_errors: + return None + else: + raise e def get_same_node(self): from aiida.orm.querybuilder import QueryBuilder From f076bfe15c90d30df645b9a12beb5d5f53cbf32e Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 16 Aug 2017 13:20:09 +0200 Subject: [PATCH 040/101] Add fast-forwarding for JobCalculations --- aiida/orm/implementation/general/calculation/job/__init__.py | 4 +++- aiida/orm/implementation/general/calculation/work.py | 3 --- aiida/work/process.py | 4 ++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index fce5295522..f0a9bc59a4 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -102,7 +102,9 @@ def store(self, *args, **kwargs): super(AbstractJobCalculation, self).store(*args, **kwargs) # I get here if the calculation was successfully stored. - self._set_state(calc_states.NEW) + # Set to new only if it is not already FINISHED (due to caching) + if not self.get_state() == calc_states.FINISHED: + self._set_state(calc_states.NEW) # Important to return self to allow the one-liner # c = Calculation().store() diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index 2e5d295639..b0c284aa18 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -67,6 +67,3 @@ def get_hash(self, ignore_errors=True): return None else: raise e - - def _is_valid_cache(self): - return super(WorkCalculation, self)._is_valid_cache() and self.has_finished_ok diff --git a/aiida/work/process.py b/aiida/work/process.py index 752aad6ae7..9756260ff0 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -328,6 +328,10 @@ def _create_and_setup_db_record(self): if name.endswith('_{pk}'.format(pk=value.pk)): continue self.out(name, value) + # This is needed for JobProcess. In that case, the outputs are + # returned regardless of whether they end in '_pk' + for name, value in self.calc.get_outputs_dict(link_type=LinkType.CREATE).items(): + self.out(name, value) if self.calc.pk is not None: return self.calc.pk From ba052a5e555fc42c89899c51f097f226b019cc87 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 16 Aug 2017 14:59:15 +0200 Subject: [PATCH 041/101] Implement _is_valid_cache at AbstractCalculation level, add abstractmethod for has_finished_ok --- .../implementation/general/calculation/__init__.py | 11 +++++++++++ aiida/orm/implementation/general/calculation/work.py | 3 --- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index c20f37b0d9..5ddd67bd93 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -8,6 +8,7 @@ # For further information please visit http://www.aiida.net # ########################################################################### +import abc import collections from aiida.common.utils import classproperty @@ -324,3 +325,13 @@ def get_code(self): from aiida.orm.code import Code return dict(self.get_inputs(node_type=Code, also_labels=True)).get( self._use_methods['code']['linkname'], None) + + @abc.abstractmethod + def has_finished_ok(self): + """ + Returns whether the Calculation has finished successfully. + """ + raise NotImplementedError + + def _is_valid_cache(self): + return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index b0c284aa18..c34dd548be 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -46,9 +46,6 @@ def has_failed(self): """ return self.get_attr(self.FAILED_KEY, False) is not False - def _is_valid_cache(self): - return super(WorkCalculation, self)._is_valid_cache() and self.has_finished_ok - def get_hash(self, ignore_errors=True): from aiida.common.hashing import make_hash base_hash = super(WorkCalculation, self).get_hash(ignore_errors=ignore_errors) From 3b02796063822773eab968de8f753eb4df5a654a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 17 Aug 2017 13:01:17 +0200 Subject: [PATCH 042/101] Add option to configure caching / fast-forwarding via config file --- aiida/backends/tests/nodes.py | 14 ------- aiida/common/_caching_defaults.py | 66 ------------------------------- aiida/common/caching.py | 66 ++++++++++++++++++++++++++++++- aiida/work/process.py | 12 ++++-- setup_requirements.py | 1 + 5 files changed, 74 insertions(+), 85 deletions(-) delete mode 100644 aiida/common/_caching_defaults.py diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 2d1686acdc..fc55934863 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -45,20 +45,6 @@ def test_simple_equal_nodes(self): self.assertEqual(n1.uuid, n2.uuid) self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) - def test_set_default(self): - attributes = [ - (1.0, 2, 3), - ({'a': 'b', 'c': 'd'}, [1, 2, 3], {4, 2, 2}) - ] - for attr in attributes: - n1 = self.create_simple_node(*attr) - n2 = self.create_simple_node(*attr) - with caching.EnableCaching(): - n1.store(use_cache=caching.defaults.use_cache) - n2.store(use_cache=caching.defaults.use_cache) - self.assertEqual(n1.uuid, n2.uuid) - self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) - @staticmethod def create_folderdata_with_empty_file(): from aiida.orm.data.folder import FolderData diff --git a/aiida/common/_caching_defaults.py b/aiida/common/_caching_defaults.py deleted file mode 100644 index 50765d19c6..0000000000 --- a/aiida/common/_caching_defaults.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Defines the default values for using the caching mechanism. The values are mutable, meaning they have the following behavior: - -.. code :: python - >>> x = defaults.use_cache - >>> print(x) - False - >>> defaults.use_cache = True - >>> print(x) - True -""" - -__all__ = ['defaults', 'EnableCaching'] - -class _MutableBool(object): - def __init__(self, value): - self.value = value - - def get(self): - return self.value - - def set(self, value): - self.value = value - - def __nonzero__(self): - return self.__bool__() - - def __bool__(self): - return self.value - - def __str__(self): - return str(bool(self)) - -class _Defaults(object): - """ - A class containing the default values for caching. - """ - def __init__(self): - self._use_cache = _MutableBool(False) - - @property - def use_cache(self): - return self._use_cache - - @use_cache.setter - def use_cache(self, value): - self._use_cache.set(bool(value)) - -defaults = _Defaults() - -class EnableCaching(object): - """ - Context manager to temporarily set the caching default. The previous value will be restored upon exiting. - """ - def __init__(self, value=True): - self.value = value - - def __enter__(self): - self.previous_value = bool(defaults.use_cache) - defaults.use_cache = self.value - - def __exit__(self, exc_type, exc_val, exc_tb): - defaults.use_cache = self.previous_value diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 56ea833d23..ef320abf24 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -1,4 +1,68 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from ._caching_defaults import defaults, EnableCaching +import os +import yaml +try: + from collections import ChainMap +except ImportError: + from chainmap import ChainMap + +from aiida.common.extendeddicts import Enumerate +from aiida.common.setup import AIIDA_CONFIG_FOLDER +from aiida.backends.utils import get_current_profile + +__all__ = ['CONFIG'] + +config_keys = Enumerate(( + 'use_cache', 'fast_forward', 'default', 'enabled', 'disabled' +)) + +DEFAULT_CONFIG = { + config_keys.use_cache: {config_keys.default: False}, + config_keys.fast_forward: { + config_keys.default: False, + config_keys.enabled: [], + config_keys.disabled: [], + } +} + +def _get_config(): + try: + with open(os.path.join(AIIDA_CONFIG_FOLDER, 'cache_config.yml'), 'r') as f: + config = yaml.load(f)[get_current_profile()] + # validate configuration + for key, value in config.items(): + error_msg = "Configuration error: Invalid key '{}' in cache_config.yml" + if key not in DEFAULT_CONFIG: + raise ValueError(error_msg.format(key)) + for sub_key in value: + if sub_key not in DEFAULT_CONFIG[key]: + raise ValueError(error_msg.format(sub_key)) + + # add defaults where config is missing + for key, default_config in DEFAULT_CONFIG.items(): + config[key] = ChainMap(config.get(key, {}), default_config) + return config + + # no config file, or no config for this profile + except (OSError, IOError, KeyError): + return DEFAULT_CONFIG + +CONFIG = _get_config() + +def get_use_cache_default(): + return CONFIG[config_keys.use_cache][config_keys.default] + +def get_fast_forward_enabled(class_name): + fast_forward_config = CONFIG[config_keys.fast_forward] + enabled = class_name in fast_forward_config[config_keys.enabled] + disabled = class_name in fast_forward_config[config_keys.disabled] + if enabled and disabled: + raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(class_name)) + elif enabled: + return True + elif disabled: + return False + else: + return fast_forward_config[config_keys.default] diff --git a/aiida/work/process.py b/aiida/work/process.py index 9756260ff0..34c09d8764 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -35,8 +35,6 @@ from aiida.orm.data.parameter import ParameterData from aiida.orm.calculation.work import WorkCalculation from aiida import LOG_LEVEL_REPORT -from aiida.common import caching - class DictSchema(object): @@ -400,8 +398,14 @@ def _add_description_and_label(self): def _fast_forward_enabled(self): # First priority: inputs - return self._parsed_inputs.get('_fast_forward', False) - # TODO: Add process and global level settings + try: + return self._parsed_inputs['_fast_forward'] + # Second priority: config + except KeyError: + return ( + caching.get_fast_forward_enabled(type(self).__name__) or + caching.get_fast_forward_enabled(type(self._calc).__name__) + ) class FunctionProcess(Process): _func_args = None diff --git a/setup_requirements.py b/setup_requirements.py index 2f68d9e49e..e16c45dcd9 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -23,6 +23,7 @@ 'future', 'pathlib2', 'singledispatch >= 3.4.0.0', + 'pyyaml', # We need for the time being to stay with an old version # of celery, including the versions of the AMQP libraries below, # because the support for a SQLA broker has been dropped in later From 509b7e8e83f72c172dadea3b003abe0350ba150e Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 17 Aug 2017 14:15:40 +0200 Subject: [PATCH 043/101] Set Py2 requirements via environment marker --- aiida/common/hashing.py | 1 - setup_requirements.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index d9bd56a65d..dfc33fe940 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -14,7 +14,6 @@ from datetime import datetime import numbers try: # Python3 - import pathlib from functools import singledispatch from collections import abc except ImportError: # Python2 diff --git a/setup_requirements.py b/setup_requirements.py index e16c45dcd9..8ea96a5090 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -21,8 +21,6 @@ 'pytz==2014.10', 'six==1.10', 'future', - 'pathlib2', - 'singledispatch >= 3.4.0.0', 'pyyaml', # We need for the time being to stay with an old version # of celery, including the versions of the AMQP libraries below, @@ -64,6 +62,8 @@ ] extras_require = { + # Requirements for Python 2 only + ':python_version < "3"': ['chainmap', 'pathlib2', 'singledispatch >= 3.4.0.0'], # Requirements for ssh transport with authentification through Kerberos # token # N. B.: you need to install first libffi and MIT kerberos GSSAPI including header files. From b79fe101a94b451adf12e74fe353dd0c2054d4ff Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 17 Aug 2017 15:33:21 +0200 Subject: [PATCH 044/101] Add _get_objects_to_hash method, to de-duplicate get_hash code --- .../general/calculation/work.py | 25 ++++++------------- aiida/orm/implementation/general/node.py | 23 +++++++++-------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index c34dd548be..4d2ec00c61 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -46,21 +46,10 @@ def has_failed(self): """ return self.get_attr(self.FAILED_KEY, False) is not False - def get_hash(self, ignore_errors=True): - from aiida.common.hashing import make_hash - base_hash = super(WorkCalculation, self).get_hash(ignore_errors=ignore_errors) - if base_hash is None: - return None - try: - return make_hash([ - base_hash, - { - key: value.get_hash() for key, value in self.get_inputs_dict().items() - if key not in self._hash_ignored_inputs - } - ]) - except Exception as e: - if ignore_errors: - return None - else: - raise e + def _get_objects_to_hash(self): + res = super(WorkCalculation, self)._get_objects_to_hash() + res.append({ + key: value.get_hash() for key, value in self.get_inputs_dict().items() + if key not in self._hash_ignored_inputs + }) + return res diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index d35c176600..062a2f4e41 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1485,22 +1485,25 @@ def get_hash(self, ignore_errors=True): """ from aiida.common.hashing import make_hash try: - return make_hash([ - importlib.import_module( - self.__module__.split('.', 1)[0] - ).__version__, - { - key: val for key, val in self.get_attrs().items() - if key not in self._hash_ignored_attributes - }, - self.folder - ]) + return make_hash(self._get_objects_to_hash()) except Exception as e: if ignore_errors: return None else: raise e + def _get_objects_to_hash(self): + return [ + importlib.import_module( + self.__module__.split('.', 1)[0] + ).__version__, + { + key: val for key, val in self.get_attrs().items() + if key not in self._hash_ignored_attributes + }, + self.folder + ] + def get_same_node(self): from aiida.orm.querybuilder import QueryBuilder From f989e9252f80088d84f4f318d960b074aa1edf14 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 17 Aug 2017 23:55:33 +0200 Subject: [PATCH 045/101] Add 'expanduser' to getting the cache config --- aiida/common/caching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index ef320abf24..b062de52bd 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -29,7 +29,7 @@ def _get_config(): try: - with open(os.path.join(AIIDA_CONFIG_FOLDER, 'cache_config.yml'), 'r') as f: + with open(os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), 'cache_config.yml'), 'r') as f: config = yaml.load(f)[get_current_profile()] # validate configuration for key, value in config.items(): From d64add2f4f724a26ffe11120fe34f79a05651e8f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 20 Aug 2017 22:39:37 +0200 Subject: [PATCH 046/101] - Add **kwargs to make_hash - ignore 'raw_inputs' subfolder when creating JobCalculation hash - add ignored attributes to JobCalculations --- aiida/backends/tests/work/workfunction.py | 8 +++ aiida/common/hashing.py | 49 ++++++++++--------- aiida/orm/calculation/job/__init__.py | 2 +- .../general/calculation/__init__.py | 17 +++++++ .../general/calculation/job/__init__.py | 30 +++++++++++- .../general/calculation/work.py | 18 +++---- aiida/orm/implementation/general/node.py | 4 +- 7 files changed, 89 insertions(+), 39 deletions(-) diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index 78770c64bd..fe5f8429b4 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -76,6 +76,8 @@ def simple_cached_wf(inp): r2, pid2 = run(simple_cached_wf, inp=Int(2), _return_pid=True, _fast_forward=True) self.assertEqual(r, r2) self.assertEqual(pid, pid2) + self._check_hash_consistent(pid) + self._check_hash_consistent(pid2) def test_caching_different(self): # Creating a new workfunction to avoid getting other results. @@ -87,3 +89,9 @@ def simple_cached_wf(inp): r2, pid2 = run(simple_cached_wf, inp=Int(3), _return_pid=True, _fast_forward=True) self.assertNotEqual(r, r2) self.assertNotEqual(pid, pid2) + self._check_hash_consistent(pid) + self._check_hash_consistent(pid2) + + def _check_hash_consistent(self, pid): + wc = load_node(pid) + self.assertEqual(wc.get_hash(), wc.get_extra('hash')) diff --git a/aiida/common/hashing.py b/aiida/common/hashing.py index dfc33fe940..c1bee463c6 100644 --- a/aiida/common/hashing.py +++ b/aiida/common/hashing.py @@ -124,7 +124,7 @@ def make_hash_with_type(type_chr, string_to_hash): return hashlib.sha224("{}{}".format(type_chr, string_to_hash)).hexdigest() @singledispatch -def make_hash(object_to_hash): +def make_hash(object_to_hash, **kwargs): """ Makes a hash from a dictionary, list, tuple or set to any level, that contains only other hashable or nonhashable types (including lists, tuples, sets, and @@ -197,77 +197,79 @@ def make_hash(object_to_hash): ) @make_hash.register(abc.Sequence) -def _(sequence): +def _(sequence, **kwargs): hashes = tuple([ - make_hash(x) for x in sequence + make_hash(x, **kwargs) for x in sequence ]) return make_hash_with_type('L', ",".join(hashes)) @make_hash.register(abc.Set) -def _(object_to_hash): +def _(object_to_hash, **kwargs): hashes = tuple([ - make_hash(x) + make_hash(x, **kwargs) for x in sorted(object_to_hash) ]) return make_hash_with_type('S', ",".join(hashes)) @make_hash.register(abc.Mapping) -def _(mapping): +def _(mapping, **kwargs): hashed_dictionary = { - k: make_hash(v) + k: make_hash(v, **kwargs) for k,v in mapping.items() } return make_hash_with_type( 'D', - make_hash(sorted(hashed_dictionary.items())) + make_hash(sorted(hashed_dictionary.items()), **kwargs) ) @make_hash.register(numbers.Real) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type( 'f', truncate_float64(object_to_hash).tobytes() ) @make_hash.register(numbers.Complex) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type( 'c', ','.join([ - make_hash(object_to_hash.real), - make_hash(object_to_hash.imag) + make_hash(object_to_hash.real, **kwargs), + make_hash(object_to_hash.imag, **kwargs) ]) ) @make_hash.register(numbers.Integral) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type('i', str(object_to_hash)) @make_hash.register(basestring) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type('s', object_to_hash) @make_hash.register(bool) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type('b', str(object_to_hash)) @make_hash.register(type(None)) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type('n', str(object_to_hash)) @make_hash.register(datetime) -def _(object_to_hash): +def _(object_to_hash, **kwargs): return make_hash_with_type('d', str(object_to_hash)) @make_hash.register(Folder) -def _(folder): +def _(folder, **kwargs): # make sure file is closed after being read def _read_file(folder, name): with folder.open(name) as f: return f.read() + ignored_folder_content = kwargs.get('ignored_folder_content', []) + return make_hash_with_type( 'pd', make_hash([ @@ -277,15 +279,16 @@ def _read_file(folder, name): make_hash_with_type('pf', _read_file(folder, name)) ) for name in sorted(folder.get_content_list()) - ]) + if name not in ignored_folder_content + ], **kwargs) ) @make_hash.register(np.ndarray) -def _(object_to_hash): +def _(object_to_hash, **kwargs): if object_to_hash.dtype == np.float64: return make_hash_with_type( 'af', - make_hash(truncate_array64(object_to_hash).tobytes()) + make_hash(truncate_array64(object_to_hash).tobytes(), **kwargs) ) elif object_to_hash.dtype == np.complex128: return make_hash_with_type( @@ -293,12 +296,12 @@ def _(object_to_hash): make_hash([ object_to_hash.real, object_to_hash.imag - ]) + ], **kwargs) ) else: return make_hash_with_type( 'ao', - make_hash(object_to_hash.tobytes()) + make_hash(object_to_hash.tobytes(), **kwargs) ) def truncate_float64(x, num_bits=4): diff --git a/aiida/orm/calculation/job/__init__.py b/aiida/orm/calculation/job/__init__.py index f9b769adf4..2d5e0317cb 100644 --- a/aiida/orm/calculation/job/__init__.py +++ b/aiida/orm/calculation/job/__init__.py @@ -18,7 +18,7 @@ class JobCalculation(_JC): def get_desc(self): """ - Returns a string with infos retrieved from a JobCalculation node's + Returns a string with infos retrieved from a JobCalculation node's properties. """ return self.get_state(from_attribute=True) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index 5ddd67bd93..7a23240cc2 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -83,6 +83,15 @@ class AbstractCalculation(SealableWithUpdatableAttributes): calculations run via a scheduler. """ + @classproperty + def _hash_ignored_attributes(cls): + return super(AbstractCalculation, cls)._hash_ignored_attributes + [ + '_sealed', + '_finished', + ] + + _hash_ignored_inputs = [] + # A tuple with attributes that can be updated even after # the call of the store() method @@ -335,3 +344,11 @@ def has_finished_ok(self): def _is_valid_cache(self): return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() + + def _get_objects_to_hash(self): + res = super(AbstractCalculation, self)._get_objects_to_hash() + res.append({ + key: value.get_hash() for key, value in self.get_inputs_dict().items() + if key not in self._hash_ignored_inputs + }) + return res diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index f0a9bc59a4..c2be7a41c8 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -13,7 +13,7 @@ import copy from aiida.utils import timezone -from aiida.common.utils import str_timedelta +from aiida.common.utils import str_timedelta, classproperty from aiida.common.datastructures import calc_states from aiida.common.exceptions import ModificationNotAllowed, MissingPluginError from aiida.common.links import LinkType @@ -27,7 +27,6 @@ # 'rerunnable', # 'resourceLimits', - _input_subfolder = 'raw_input' @@ -37,6 +36,33 @@ class AbstractJobCalculation(object): remotely on a job scheduler. """ + @classproperty + def _hash_ignored_attributes(cls): + return super(AbstractJobCalculation, cls)._hash_ignored_attributes + [ + 'state', + 'scheduler_state', + 'scheduler_lastchecktime', + 'remote_workdir', + 'last_jobinfo', + 'job_id', + 'queue_name', + 'max_wallclock_seconds', + 'retrieve_list', + 'retrieve_singlefile_list', + ] + + def get_hash( + self, + ignore_errors=True, + ignored_folder_content=('raw_input',), + **kwargs + ): + return super(AbstractJobCalculation, self).get_hash( + ignore_errors=ignore_errors, + ignored_folder_content=ignored_folder_content, + **kwargs + ) + @classmethod def process(cls): from aiida.work.legacy.job_process import JobProcess diff --git a/aiida/orm/implementation/general/calculation/work.py b/aiida/orm/implementation/general/calculation/work.py index 4d2ec00c61..213a732a59 100644 --- a/aiida/orm/implementation/general/calculation/work.py +++ b/aiida/orm/implementation/general/calculation/work.py @@ -9,6 +9,7 @@ ########################################################################### from aiida.orm.implementation.calculation import Calculation +from aiida.common.utils import classproperty from aiida.common.lang import override from aiida.common import caching @@ -19,10 +20,13 @@ class WorkCalculation(Calculation): """ FINISHED_KEY = '_finished' FAILED_KEY = '_failed' - _hash_ignored_attributes = [FINISHED_KEY] - _hash_ignored_inputs = ['_return_pid', '_fast_forward'] - _hash_ignored_attributes = ['_finished', '_sealed'] + @classproperty + def _hash_ignored_inputs(cls): + return super(WorkCalculation, cls)._hash_ignored_inputs + [ + '_return_pid', + '_fast_forward' + ] @override def has_finished_ok(self): @@ -45,11 +49,3 @@ def has_failed(self): :rtype: bool """ return self.get_attr(self.FAILED_KEY, False) is not False - - def _get_objects_to_hash(self): - res = super(WorkCalculation, self)._get_objects_to_hash() - res.append({ - key: value.get_hash() for key, value in self.get_inputs_dict().items() - if key not in self._hash_ignored_inputs - }) - return res diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 062a2f4e41..127650bed9 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1479,13 +1479,13 @@ def __del__(self): self._temp_folder.erase() - def get_hash(self, ignore_errors=True): + def get_hash(self, ignore_errors=True, **kwargs): """ Making a hash based on my attributes """ from aiida.common.hashing import make_hash try: - return make_hash(self._get_objects_to_hash()) + return make_hash(self._get_objects_to_hash(), **kwargs) except Exception as e: if ignore_errors: return None From f1d7171acae306be4a5d54a01a716d8928cb431c Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 20 Aug 2017 23:48:14 +0200 Subject: [PATCH 047/101] Add re-hash command --- aiida/cmdline/commands/rehash.py | 34 ++++++++++++++++++++++++ aiida/cmdline/verdilib.py | 1 + aiida/orm/implementation/django/node.py | 4 +-- aiida/orm/implementation/general/node.py | 3 +++ 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 aiida/cmdline/commands/rehash.py diff --git a/aiida/cmdline/commands/rehash.py b/aiida/cmdline/commands/rehash.py new file mode 100644 index 0000000000..044a7980cc --- /dev/null +++ b/aiida/cmdline/commands/rehash.py @@ -0,0 +1,34 @@ +import click + +from plum.util import load_class + +from aiida import try_load_dbenv +from aiida.cmdline.baseclass import VerdiCommand + + +class Rehash(VerdiCommand): + """ + Re-hash all nodes. + """ + def run(self, *args): + ctx = _rehash_cmd.make_context('rehash', list(args)) + with ctx: + _rehash_cmd.invoke(ctx) + + def complete(self, subargs_idx, subargs): + """ + No completion after 'verdi rehash'. + """ + print "" + +@click.command('rehash') +@click.argument('classname', type=str, default='aiida.orm.node.Node') +def _rehash_cmd(classname): + try_load_dbenv() + from aiida.orm.querybuilder import QueryBuilder + node_class = load_class(classname) + qb = QueryBuilder() + qb.append(node_class) + for i, n in enumerate(qb.iterall()): + n[0].rehash() + click.echo('All done! {} nodes re-hashed.'.format(i + 1)) diff --git a/aiida/cmdline/verdilib.py b/aiida/cmdline/verdilib.py index 44c433407f..47ad2bee84 100644 --- a/aiida/cmdline/verdilib.py +++ b/aiida/cmdline/verdilib.py @@ -53,6 +53,7 @@ class inheriting from VerdiCommand, and define a run(self,*args) method from aiida.cmdline.commands.comment import Comment from aiida.cmdline.commands.shell import Shell from aiida.cmdline.commands.restapi import Restapi +from aiida.cmdline.commands.rehash import Rehash from aiida.cmdline import execname diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index c1d97e312a..cce213c1e3 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -665,11 +665,11 @@ def store(self, with_transaction=True, use_cache=False): g = Group.get_or_create(name=group_name, type_string=grouptype)[0] g.add_nodes(self) - # This is useful because in this way I can do - # n = Node().store() from aiida.backends.djsite.db.models import DbExtra # I store the hash without cleaning and without incrementing the nodeversion number DbExtra.set_value_for_node(self.dbnode, 'hash', self.get_hash()) + # This is useful because in this way I can do + # n = Node().store() return self @property diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 127650bed9..0720e20f89 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1504,6 +1504,9 @@ def _get_objects_to_hash(self): self.folder ] + def rehash(self): + self.set_extra('hash', self.get_hash()) + def get_same_node(self): from aiida.orm.querybuilder import QueryBuilder From 7b460abf99b72d7bacb08e3b1c726dc8103a923b Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 20 Aug 2017 23:50:59 +0200 Subject: [PATCH 048/101] Add '.' for every 100 re-hashed nodes --- aiida/cmdline/commands/rehash.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aiida/cmdline/commands/rehash.py b/aiida/cmdline/commands/rehash.py index 044a7980cc..383fec876a 100644 --- a/aiida/cmdline/commands/rehash.py +++ b/aiida/cmdline/commands/rehash.py @@ -30,5 +30,7 @@ def _rehash_cmd(classname): qb = QueryBuilder() qb.append(node_class) for i, n in enumerate(qb.iterall()): + if i % 100 == 0: + click.echo('.', nl=False) n[0].rehash() - click.echo('All done! {} nodes re-hashed.'.format(i + 1)) + click.echo('\nAll done! {} nodes re-hashed.'.format(i + 1)) From 1df91282df285c80e89485d7e5513d871f4d5e9d Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 21 Aug 2017 10:53:20 +0200 Subject: [PATCH 049/101] Get only INPUT link in get_inputs_dict for objects to hash --- aiida/orm/implementation/general/calculation/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index 7a23240cc2..34bc6bb940 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -348,7 +348,10 @@ def _is_valid_cache(self): def _get_objects_to_hash(self): res = super(AbstractCalculation, self)._get_objects_to_hash() res.append({ - key: value.get_hash() for key, value in self.get_inputs_dict().items() + key: value.get_hash() + for key, value in self.get_inputs_dict( + link_type=LinkType.INPUT + ).items() if key not in self._hash_ignored_inputs }) return res From 58b21951907db3f7975feedfabfdeb325cd77cd6 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 21 Aug 2017 12:09:05 +0200 Subject: [PATCH 050/101] Add 'CALL' to ignored inputs --- aiida/orm/implementation/general/calculation/__init__.py | 6 ++---- aiida/orm/implementation/general/node.py | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index 34bc6bb940..f662171a82 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -90,10 +90,8 @@ def _hash_ignored_attributes(cls): '_finished', ] - _hash_ignored_inputs = [] - - # A tuple with attributes that can be updated even after - # the call of the store() method + # The link_type might not be correct while the object is being created. + _hash_ignored_inputs = ['CALL'] # Nodes that can be added as input using the use_* methods @classproperty diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 0720e20f89..56a94e2277 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1377,7 +1377,7 @@ def get_abs_path(self, path=None, section=None): reset_limit=True).get_abs_path(path, check_existence=True) @abstractmethod - def store_all(self, with_transaction=True): + def store_all(self, with_transaction=True, use_cache=False): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -1447,7 +1447,7 @@ def _store_cached_input_links(self, with_transaction=True): pass @abstractmethod - def store(self, with_transaction=True): + def store(self, with_transaction=True, use_cache=False): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -1513,7 +1513,7 @@ def get_same_node(self): hash_ = self.get_hash() if hash_: qb = QueryBuilder() - qb.append(self.__class__, filters={'extras.hash':hash_}, project='*', subclassing=False) + qb.append(self.__class__, filters={'extras.hash': hash_}, project='*', subclassing=False) for same_node, in qb.iterall(): if same_node._is_valid_cache(): return same_node From 7df259fd61f4afaac86872eae48192e0d114563d Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 20 Oct 2017 00:34:21 +0200 Subject: [PATCH 051/101] Add option to specify PKs (or --all) in verdi rehash. --- aiida/cmdline/commands/rehash.py | 33 +++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/aiida/cmdline/commands/rehash.py b/aiida/cmdline/commands/rehash.py index 383fec876a..1fdc63d419 100644 --- a/aiida/cmdline/commands/rehash.py +++ b/aiida/cmdline/commands/rehash.py @@ -1,6 +1,9 @@ +import sys + import click from plum.util import load_class +from plum.exceptions import ClassNotFoundException from aiida import try_load_dbenv from aiida.cmdline.baseclass import VerdiCommand @@ -22,15 +25,35 @@ def complete(self, subargs_idx, subargs): print "" @click.command('rehash') -@click.argument('classname', type=str, default='aiida.orm.node.Node') -def _rehash_cmd(classname): +@click.option('--all', '-a', is_flag=True, help='Rehash all nodes of the given Node class.') +@click.option('--class-name', type=str, default='aiida.orm.node.Node', help='Restrict nodes which are re-hashed to instances of this class.') +@click.argument('pks', type=int, nargs=-1) +def _rehash_cmd(all, class_name, pks): try_load_dbenv() from aiida.orm.querybuilder import QueryBuilder - node_class = load_class(classname) + + # Get the Node class to match + try: + node_class = load_class(class_name) + except ClassNotFoundException: + click.echo("Could not load class '{}'.\nAborted!".format(class_name)) + sys.exit(1) + + # Add the filters for the class and PKs. qb = QueryBuilder() - qb.append(node_class) + qb.append(node_class, tag='node') + if pks: + qb.add_filter('node', {'id': {'in': pks}}) + else: + if not all: + click.echo("Nothing specified, nothing re-hashed.\nExplicitly specify the PK of the nodes, or use '--all'.") + return + + i = -1 for i, n in enumerate(qb.iterall()): if i % 100 == 0: click.echo('.', nl=False) n[0].rehash() - click.echo('\nAll done! {} nodes re-hashed.'.format(i + 1)) + if i > 0: + click.echo('') + click.echo('All done! {} nodes re-hashed.'.format(i + 1)) From 4168fa0ea4456be33eac2162a7f2fb1f4b0d9503 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 20 Oct 2017 09:22:37 +0200 Subject: [PATCH 052/101] Simplify verdi rehash code --- aiida/cmdline/commands/rehash.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida/cmdline/commands/rehash.py b/aiida/cmdline/commands/rehash.py index 1fdc63d419..3862d6d04a 100644 --- a/aiida/cmdline/commands/rehash.py +++ b/aiida/cmdline/commands/rehash.py @@ -49,11 +49,11 @@ def _rehash_cmd(all, class_name, pks): click.echo("Nothing specified, nothing re-hashed.\nExplicitly specify the PK of the nodes, or use '--all'.") return - i = -1 - for i, n in enumerate(qb.iterall()): + if not qb.count(): + click.echo('No matching nodes found.') + return + for i, (node,) in enumerate(qb.iterall()): if i % 100 == 0: click.echo('.', nl=False) - n[0].rehash() - if i > 0: - click.echo('') - click.echo('All done! {} nodes re-hashed.'.format(i + 1)) + node.rehash() + click.echo('\nAll done! {} node(s) re-hashed.'.format(i + 1)) From 537cd893a9c2be83717df759a1871e64f822cb3d Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Fri, 27 Oct 2017 17:10:22 +0200 Subject: [PATCH 053/101] Run pre-commit --- docs/requirements_for_rtd.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements_for_rtd.txt b/docs/requirements_for_rtd.txt index 42b0dd6f0a..210288e5e8 100644 --- a/docs/requirements_for_rtd.txt +++ b/docs/requirements_for_rtd.txt @@ -27,10 +27,10 @@ pycrypto==2.6.1 python-dateutil==2.6.0 python-mimeparse==0.1.4 pytz==2014.10 +pyyaml reentry==1.0.2 scipy<1.0.0 setuptools==36.6.0 -singledispatch==3.4.0.3 six==1.10.0 supervisor==3.1.3 tabulate==0.7.5 From d809be64ae92f6037aa4029b5b87aa91b6d567f2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 30 Oct 2017 14:07:42 +0100 Subject: [PATCH 054/101] Update plumpy requirement to 0.7.11 --- .travis.yml | 2 +- setup_requirements.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3a0a7f261e..ca8d7221b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,7 +44,7 @@ install: # Install AiiDA with some optional dependencies - pip install .[REST,docs,atomic_tools,testing,dev_precommit] # Unreleased plum version required for fast-forwarding - - pip install git+https://bitbucket.org/greschd/plum@edge + - pip install git+https://bitbucket.org/greschd/plum@submit_with_process_type env: ## Build matrix to test both backends, and the docs diff --git a/setup_requirements.py b/setup_requirements.py index dd13bbda08..6e6fc311e3 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -41,7 +41,7 @@ 'supervisor==3.1.3', 'meld3==1.0.0', 'numpy==1.12.0', - 'plumpy==0.7.10', + 'plumpy==0.7.11', 'portalocker==1.1.0', 'SQLAlchemy==1.0.12', # upgrade to SQLalchemy 1.1.5 does break tests, see #465 'SQLAlchemy-Utils==0.31.2', From 5a63309f4fc267906590f8a838c3b455f3477780 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 30 Oct 2017 14:13:26 +0100 Subject: [PATCH 055/101] Install plumpy before AiiDA on travis build --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ca8d7221b9..55a81aa7bd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,10 +41,11 @@ before_install: install: # Upgrade pip setuptools and wheel to be able to run the next command - pip install -U pip wheel setuptools + # Unreleased plum version required for fast-forwarding + - pip install git+https://github.com/greschd/plumpy@submit_with_process_type # Install AiiDA with some optional dependencies - pip install .[REST,docs,atomic_tools,testing,dev_precommit] - # Unreleased plum version required for fast-forwarding - - pip install git+https://bitbucket.org/greschd/plum@submit_with_process_type + env: ## Build matrix to test both backends, and the docs From 42cde395a87aadcdb53bfb8ed22aee19077c4a45 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 30 Oct 2017 14:25:33 +0100 Subject: [PATCH 056/101] Update RTD requirements --- docs/requirements_for_rtd.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements_for_rtd.txt b/docs/requirements_for_rtd.txt index 210288e5e8..0907ecb0da 100644 --- a/docs/requirements_for_rtd.txt +++ b/docs/requirements_for_rtd.txt @@ -21,7 +21,7 @@ paramiko==2.1.2 passlib==1.7.1 pathlib2==2.3.0 pip==9.0.1 -plumpy==0.7.10 +plumpy==0.7.11 portalocker==1.1.0 pycrypto==2.6.1 python-dateutil==2.6.0 From e96cd6b01c26de3984b0bf93fe8c5914a001117f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 30 Oct 2017 17:12:23 +0100 Subject: [PATCH 057/101] Ignore '_updatable_attributes' in addition to '_hash_ignored_attributes'. --- aiida/backends/tests/nodes.py | 12 ++++++++++++ .../general/calculation/job/__init__.py | 9 +-------- aiida/orm/implementation/general/node.py | 5 ++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 6386bcffa9..52205de3e6 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -20,6 +20,7 @@ from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType from aiida.common import caching +from aiida.orm.code import Code from aiida.orm.data import Data from aiida.orm.node import Node from aiida.orm.utils import load_node @@ -119,6 +120,17 @@ def create_arraydata(arr): a2.store(use_cache=True) self.assertNotEquals(a1.uuid, a2.uuid) + def test_updatable_attributes(self): + """ + Tests that updatable attributes are ignored. + """ + c = Code() + hash1 = c.get_hash() + c._hide() + hash2 = c.get_hash() + self.assertNotEquals(hash1, None) + self.assertEquals(hash1, hash2) + class TestDataNode(AiidaTestCase): """ These tests check the features of Data nodes that differ from the base Node diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 256c76d510..2e99154060 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -38,17 +38,10 @@ class AbstractJobCalculation(object): @classproperty def _hash_ignored_attributes(cls): + # _updatable_attributes are ignored automatically. return super(AbstractJobCalculation, cls)._hash_ignored_attributes + [ - 'state', - 'scheduler_state', - 'scheduler_lastchecktime', - 'remote_workdir', - 'last_jobinfo', - 'job_id', 'queue_name', 'max_wallclock_seconds', - 'retrieve_list', - 'retrieve_singlefile_list', ] def get_hash( diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index bb78ee43da..910f3ab600 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1641,7 +1641,10 @@ def _get_objects_to_hash(self): ).__version__, { key: val for key, val in self.get_attrs().items() - if key not in self._hash_ignored_attributes + if ( + (key not in self._hash_ignored_attributes) and + (key not in getattr(self, '_updatable_attributes', tuple())) + ) }, self.folder ] From afbb3c586f692e94b343db946c92928779cd2871 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 31 Oct 2017 13:51:03 +0100 Subject: [PATCH 058/101] Enable use of the cache_config for use_cache --- aiida/common/caching.py | 77 +++++++++++++++------ aiida/orm/implementation/django/node.py | 7 +- aiida/orm/implementation/general/node.py | 8 +-- aiida/orm/implementation/sqlalchemy/node.py | 7 +- aiida/work/process.py | 4 +- 5 files changed, 71 insertions(+), 32 deletions(-) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index b062de52bd..7b7cc5963e 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -8,11 +8,17 @@ except ImportError: from chainmap import ChainMap +from plum.util import load_class +from future.utils import raise_from +from plum.exceptions import ClassNotFoundException + +import aiida +from aiida.common.exceptions import ConfigurationError from aiida.common.extendeddicts import Enumerate from aiida.common.setup import AIIDA_CONFIG_FOLDER from aiida.backends.utils import get_current_profile -__all__ = ['CONFIG'] +__all__ = ['get_fast_forward_enabled', 'get_use_cache_default'] config_keys = Enumerate(( 'use_cache', 'fast_forward', 'default', 'enabled', 'disabled' @@ -27,39 +33,66 @@ } } -def _get_config(): +def _get_config(config_file): try: - with open(os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), 'cache_config.yml'), 'r') as f: + with open(config_file, 'r') as f: config = yaml.load(f)[get_current_profile()] - # validate configuration - for key, value in config.items(): - error_msg = "Configuration error: Invalid key '{}' in cache_config.yml" - if key not in DEFAULT_CONFIG: - raise ValueError(error_msg.format(key)) - for sub_key in value: - if sub_key not in DEFAULT_CONFIG[key]: - raise ValueError(error_msg.format(sub_key)) - - # add defaults where config is missing - for key, default_config in DEFAULT_CONFIG.items(): - config[key] = ChainMap(config.get(key, {}), default_config) - return config - # no config file, or no config for this profile except (OSError, IOError, KeyError): return DEFAULT_CONFIG -CONFIG = _get_config() + # validate configuration + for key, value in config.items(): + error_msg = "Configuration error: Invalid key '{}' in cache_config.yml" + if key not in DEFAULT_CONFIG: + raise ValueError(error_msg.format(key)) + for sub_key in value: + if sub_key not in DEFAULT_CONFIG[key]: + raise ValueError(error_msg.format(sub_key)) + + # add defaults where config is missing + for key, default_config in DEFAULT_CONFIG.items(): + config[key] = ChainMap(config.get(key, {}), default_config) + for sub_key, sub_default_config in default_config.items(): + config[key][sub_key] = config[key].get(sub_key, sub_default_config) + + # load classes + aiida.try_load_dbenv() + try: + for config_part in config.values(): + for key in [config_keys.enabled, config_keys.disabled]: + if key in config_part: + config_part[key] = [load_class(c) for c in config_part[key]] + except (ImportError, ClassNotFoundException) as err: + raise_from( + ConfigurationError("Unknown class given in 'cache_config.yml': '{}'".format(err)), + err + ) + return config + +CONFIG = {} + +def configure(config_file=os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), 'cache_config.yml')): + """ + Reads the caching configuration file and sets the CONFIG variable. + """ + global CONFIG + CONFIG.clear() + CONFIG.update(_get_config(config_file=config_file)) def get_use_cache_default(): + if not CONFIG: + configure() return CONFIG[config_keys.use_cache][config_keys.default] -def get_fast_forward_enabled(class_name): +def get_fast_forward_enabled(calc_class): + if not CONFIG: + configure() fast_forward_config = CONFIG[config_keys.fast_forward] - enabled = class_name in fast_forward_config[config_keys.enabled] - disabled = class_name in fast_forward_config[config_keys.disabled] + enabled = calc_class in fast_forward_config[config_keys.enabled] + disabled = calc_class in fast_forward_config[config_keys.disabled] if enabled and disabled: - raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(class_name)) + raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(calc_class)) elif enabled: return True elif disabled: diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index b79c8938a1..56e75a4807 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -21,6 +21,7 @@ from aiida.common.folders import RepositoryFolder from aiida.common.links import LinkType from aiida.common.utils import get_new_uuid +from aiida.common.caching import get_use_cache_default from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.mixins import Sealable # from aiida.orm.implementation.django.utils import get_db_columns @@ -468,7 +469,7 @@ def dbnode(self): # self._dbnode = DbNode.objects.get(pk=self._dbnode.pk) return self._dbnode - def _db_store_all(self, with_transaction=True, use_cache=False): + def _db_store_all(self, with_transaction=True, use_cache=None): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -544,7 +545,7 @@ def _store_cached_input_links(self, with_transaction=True): # would have been raised, and the following lines are not executed) self._inputlinks_cache.clear() - def _db_store(self, with_transaction=True, use_cache=False): + def _db_store(self, with_transaction=True, use_cache=None): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -571,6 +572,8 @@ def _db_store(self, with_transaction=True, use_cache=False): from aiida.backends.djsite.db.models import DbAttribute import aiida.orm.autogroup + if use_cache is None: + use_cache = get_use_cache_default() if use_cache: same_node = self.get_same_node() if same_node is not None: diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 910f3ab600..5dc1fbb84f 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1439,7 +1439,7 @@ def get_abs_path(self, path=None, section=None): section, reset_limit=True).get_abs_path( path, check_existence=True) - def store_all(self, with_transaction=True, use_cache=False): + def store_all(self, with_transaction=True, use_cache=None): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -1467,7 +1467,7 @@ def store_all(self, with_transaction=True, use_cache=False): return self._db_store_all(with_transaction, use_cache=use_cache) @abstractmethod - def _db_store_all(self, with_transaction=True, use_cache=False): + def _db_store_all(self, with_transaction=True, use_cache=None): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -1538,7 +1538,7 @@ def _store_cached_input_links(self, with_transaction=True): """ pass - def store(self, with_transaction=True, use_cache=False): + def store(self, with_transaction=True, use_cache=None): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -1591,7 +1591,7 @@ def store(self, with_transaction=True, use_cache=False): return self @abstractmethod - def _db_store(self, with_transaction=True, use_cache=False): + def _db_store(self, with_transaction=True, use_cache=None): """ Store a new node in the DB, also saving its repository directory and attributes. diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index d8601a0579..52c8f058bd 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -27,6 +27,7 @@ from aiida.common.exceptions import (InternalError, ModificationNotAllowed, NotExistent, UniquenessError) from aiida.common.links import LinkType +from aiida.common.caching import get_use_cache_default from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.implementation.sqlalchemy.computer import Computer @@ -508,7 +509,7 @@ def id(self): def dbnode(self): return self._dbnode - def _db_store_all(self, with_transaction=True, use_cache=False): + def _db_store_all(self, with_transaction=True, use_cache=None): """ Store the node, together with all input links, if cached, and also the linked nodes, if they were not stored yet. @@ -585,7 +586,7 @@ def _store_cached_input_links(self, with_transaction=True): session.rollback() raise - def _db_store(self, with_transaction=True, use_cache=False): + def _db_store(self, with_transaction=True, use_cache=None): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -610,6 +611,8 @@ def _db_store(self, with_transaction=True, use_cache=False): # TODO: This needs to be generalized, allowing for flexible methods # for storing data and its attributes. + if use_cache is None: + use_cache = get_use_cache_default() # For node hashing, if use_cache is true: if use_cache: same_node = self.get_same_node() diff --git a/aiida/work/process.py b/aiida/work/process.py index 7b7f67e1d8..457f1c41e0 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -412,8 +412,8 @@ def _fast_forward_enabled(self): # Second priority: config except KeyError: return ( - caching.get_fast_forward_enabled(type(self).__name__) or - caching.get_fast_forward_enabled(type(self._calc).__name__) + caching.get_fast_forward_enabled(type(self)) or + caching.get_fast_forward_enabled(type(self._calc)) ) class FunctionProcess(Process): From 28e4d60b29692d405e2e0a70079d3518db652ecc Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 31 Oct 2017 15:21:21 +0100 Subject: [PATCH 059/101] Add tests for caching configuration. --- aiida/common/test_caching.py | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 aiida/common/test_caching.py diff --git a/aiida/common/test_caching.py b/aiida/common/test_caching.py new file mode 100644 index 0000000000..15087b9337 --- /dev/null +++ b/aiida/common/test_caching.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +import unittest +import tempfile + +import yaml + +from aiida.backends.utils import get_current_profile +from aiida.common.caching import configure, get_fast_forward_enabled, get_use_cache_default + +class CacheConfigTest(unittest.TestCase): + """ + Tests the caching configuration. + """ + def setUp(self): + """ + Write a temporary config file, and load the configuration. + """ + self.config_reference = { + get_current_profile(): { + 'use_cache': {'default': True}, + 'fast_forward': { + 'default': True, + 'enabled': [], + 'disabled': ['aiida.orm.calculation.job.simpleplugins.templatereplacer.TemplatereplacerCalculation'] + } + } + } + with tempfile.NamedTemporaryFile() as tf, open(tf.name, 'w') as of: + yaml.dump(self.config_reference, of) + configure(config_file=tf.name) + + def tearDown(self): + configure() + + def test_use_cache_default(self): + self.assertTrue(get_use_cache_default()) + + def test_fast_forward_default(self): + from aiida.orm.calculation.job import JobCalculation + self.assertTrue(get_fast_forward_enabled(JobCalculation)) + + def test_fast_forward_disabled(self): + from aiida.orm.calculation.job.simpleplugins.templatereplacer import TemplatereplacerCalculation + self.assertFalse(get_fast_forward_enabled(TemplatereplacerCalculation)) From 18cf330c033de61048ae1c6a6a7ba104ca5875ad Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 31 Oct 2017 17:14:52 +0100 Subject: [PATCH 060/101] Move caching config tests to db tests --- aiida/backends/tests/__init__.py | 1 + .../test_caching.py => backends/tests/test_caching_config.py} | 0 aiida/common/caching.py | 3 +-- 3 files changed, 2 insertions(+), 2 deletions(-) rename aiida/{common/test_caching.py => backends/tests/test_caching_config.py} (100%) diff --git a/aiida/backends/tests/__init__.py b/aiida/backends/tests/__init__.py index f7c209b3ce..6685d9dc59 100644 --- a/aiida/backends/tests/__init__.py +++ b/aiida/backends/tests/__init__.py @@ -63,6 +63,7 @@ 'pluginloader': ['aiida.backends.tests.test_plugin_loader'], 'daemon': ['aiida.backends.tests.daemon'], 'verdi_commands': ['aiida.backends.tests.verdi_commands'], + 'caching_config': ['aiida.backends.tests.test_caching_config'], } } diff --git a/aiida/common/test_caching.py b/aiida/backends/tests/test_caching_config.py similarity index 100% rename from aiida/common/test_caching.py rename to aiida/backends/tests/test_caching_config.py diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 7b7cc5963e..0fc869571d 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -52,12 +52,11 @@ def _get_config(config_file): # add defaults where config is missing for key, default_config in DEFAULT_CONFIG.items(): - config[key] = ChainMap(config.get(key, {}), default_config) + config[key] = config.get(key, {}) for sub_key, sub_default_config in default_config.items(): config[key][sub_key] = config[key].get(sub_key, sub_default_config) # load classes - aiida.try_load_dbenv() try: for config_part in config.values(): for key in [config_keys.enabled, config_keys.disabled]: From 050eb385529bcc2cabdb29bd1c937765d8baf3fb Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 31 Oct 2017 21:59:36 +0100 Subject: [PATCH 061/101] Implement cache lookup in general/node.py instead of db implementations --- aiida/orm/implementation/django/node.py | 13 +------------ aiida/orm/implementation/general/node.py | 19 +++++++++++++++---- aiida/orm/implementation/sqlalchemy/node.py | 14 +------------- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 56e75a4807..576935f420 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -21,7 +21,6 @@ from aiida.common.folders import RepositoryFolder from aiida.common.links import LinkType from aiida.common.utils import get_new_uuid -from aiida.common.caching import get_use_cache_default from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.mixins import Sealable # from aiida.orm.implementation.django.utils import get_db_columns @@ -545,7 +544,7 @@ def _store_cached_input_links(self, with_transaction=True): # would have been raised, and the following lines are not executed) self._inputlinks_cache.clear() - def _db_store(self, with_transaction=True, use_cache=None): + def _db_store(self, with_transaction=True): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -572,16 +571,6 @@ def _db_store(self, with_transaction=True, use_cache=None): from aiida.backends.djsite.db.models import DbAttribute import aiida.orm.autogroup - if use_cache is None: - use_cache = get_use_cache_default() - if use_cache: - same_node = self.get_same_node() - if same_node is not None: - self._dbnode = same_node.dbnode - self._to_be_stored = False - self._repo_folder = same_node._repo_folder - return self - if with_transaction: context_man = transaction.atomic() else: diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 5dc1fbb84f..44ef4de968 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -24,7 +24,7 @@ from aiida.common.folders import SandboxFolder from aiida.common.utils import abstractclassmethod from aiida.common.utils import combomethod - +from aiida.common.caching import get_use_cache_default from aiida.common.links import LinkType from aiida.common.lang import override from aiida.common.old_pluginloader import get_query_type_string @@ -1567,8 +1567,19 @@ def store(self, with_transaction=True, use_cache=None): # the case. self._check_are_parents_stored() - # call implementation-dependent store method - self._db_store(with_transaction, use_cache=use_cache) + + # Get default for use_cache if it's not set explicitly. + if use_cache is None: + use_cache = get_use_cache_default() + # Retrieve the cached node. + same_node = self.get_same_node() if use_cache else None + if same_node is not None: + self._dbnode = same_node.dbnode + self._to_be_stored = False + self._repo_folder = same_node._repo_folder + else: + # call implementation-dependent store method + self._db_store(with_transaction) # Set up autogrouping used by verdi run from aiida.orm.autogroup import current_autogroup, Autogroup, VERDIAUTOGROUP_TYPE @@ -1591,7 +1602,7 @@ def store(self, with_transaction=True, use_cache=None): return self @abstractmethod - def _db_store(self, with_transaction=True, use_cache=None): + def _db_store(self, with_transaction=True): """ Store a new node in the DB, also saving its repository directory and attributes. diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 52c8f058bd..7a2ae8f547 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -27,7 +27,6 @@ from aiida.common.exceptions import (InternalError, ModificationNotAllowed, NotExistent, UniquenessError) from aiida.common.links import LinkType -from aiida.common.caching import get_use_cache_default from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT from aiida.orm.implementation.sqlalchemy.computer import Computer @@ -586,7 +585,7 @@ def _store_cached_input_links(self, with_transaction=True): session.rollback() raise - def _db_store(self, with_transaction=True, use_cache=None): + def _db_store(self, with_transaction=True): """ Store a new node in the DB, also saving its repository directory and attributes. @@ -611,17 +610,6 @@ def _db_store(self, with_transaction=True, use_cache=None): # TODO: This needs to be generalized, allowing for flexible methods # for storing data and its attributes. - if use_cache is None: - use_cache = get_use_cache_default() - # For node hashing, if use_cache is true: - if use_cache: - same_node = self.get_same_node() - if same_node is not None: - self._dbnode = same_node.dbnode - self._to_be_stored = False - self._repo_folder = same_node._repo_folder - return self - # I save the corresponding django entry # I set the folder # NOTE: I first store the files, then only if this is successful, From 74a358a8c9ee1dd68d90228b893ccf9e2ba110f2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 1 Nov 2017 11:51:37 +0100 Subject: [PATCH 062/101] Use 'copy' when retrieving cached Node Add kwarg 'include_updatable_attrs', to not delete the updatable attributes when copying. --- aiida/backends/tests/nodes.py | 11 ++++++----- aiida/orm/implementation/django/node.py | 2 +- aiida/orm/implementation/general/node.py | 10 ++++++---- aiida/orm/implementation/sqlalchemy/node.py | 2 +- aiida/orm/mixins.py | 7 ++++--- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 52205de3e6..52307ac087 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -20,7 +20,7 @@ from aiida.common.exceptions import ModificationNotAllowed, UniquenessError from aiida.common.links import LinkType from aiida.common import caching -from aiida.orm.code import Code +from aiida.orm.code import Code from aiida.orm.data import Data from aiida.orm.node import Node from aiida.orm.utils import load_node @@ -47,8 +47,7 @@ def test_simple_equal_nodes(self): n2 = self.create_simple_node(*attr) n1.store(use_cache=True) n2.store(use_cache=True) - self.assertEqual(n1.uuid, n2.uuid) - self.assertEqual(n1.folder.get_abs_path('.'), n2.folder.get_abs_path('.')) + self.assertEqual(n1.uuid, n2.get_extra('cached_from')) @staticmethod def create_folderdata_with_empty_file(): @@ -80,14 +79,14 @@ def test_folder_same(self): f2 = self.create_folderdata_with_empty_folder() f1.store() f2.store(use_cache=True) - assert f1.uuid == f2.uuid + assert f1.uuid == f2.get_extra('cached_from') def test_file_same(self): f1 = self.create_folderdata_with_empty_file() f2 = self.create_folderdata_with_empty_file() f1.store() f2.store(use_cache=True) - assert f1.uuid == f2.uuid + assert f1.uuid == f2.get_extra('cached_from') def test_simple_unequal_nodes(self): attributes = [ @@ -100,6 +99,7 @@ def test_simple_unequal_nodes(self): n1.store() n2.store(use_cache=True) self.assertNotEquals(n1.uuid, n2.uuid) + self.assertFalse('cached_from' in n2.extras()) def test_unequal_arrays(self): import numpy as np @@ -119,6 +119,7 @@ def create_arraydata(arr): a2 = create_arraydata(arr2) a2.store(use_cache=True) self.assertNotEquals(a1.uuid, a2.uuid) + self.assertFalse('cached_from' in a2.extras()) def test_updatable_attributes(self): """ diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index 576935f420..acb1028593 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -435,7 +435,7 @@ def _increment_version_number_db(self): # otherwise I only get the Django Field F object as a result! self._dbnode = DbNode.objects.get(pk=self._dbnode.pk) - def copy(self): + def copy(self, **kwargs): newobject = self.__class__() newobject.dbnode.type = self.dbnode.type # Inherit type newobject.dbnode.label = self.dbnode.label # Inherit label diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 44ef4de968..c46d501f46 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1267,7 +1267,7 @@ def _increment_version_number_db(self): pass @abstractmethod - def copy(self): + def copy(self, **kwargs): """ Return a copy of the current object to work with, not stored yet. @@ -1574,9 +1574,11 @@ def store(self, with_transaction=True, use_cache=None): # Retrieve the cached node. same_node = self.get_same_node() if use_cache else None if same_node is not None: - self._dbnode = same_node.dbnode - self._to_be_stored = False - self._repo_folder = same_node._repo_folder + new_node = same_node.copy(include_updatable_attrs=True) + self.__dict__ = new_node.__dict__ + # self._repo_folder = new_node._repo_folder + self.store(with_transaction=with_transaction, use_cache=False) + self.set_extra('cached_from', same_node.uuid) else: # call implementation-dependent store method self._db_store(with_transaction) diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 7a2ae8f547..9a67319482 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -483,7 +483,7 @@ def _increment_version_number_db(self): raise - def copy(self): + def copy(self, **kwargs): newobject = self.__class__() newobject.dbnode.type = self.dbnode.type # Inherit type newobject.dbnode.label = self.dbnode.label # Inherit label diff --git a/aiida/orm/mixins.py b/aiida/orm/mixins.py index 08a7977067..9355ddeb8b 100644 --- a/aiida/orm/mixins.py +++ b/aiida/orm/mixins.py @@ -98,11 +98,12 @@ def iter_updatable_attrs(self): pass @override - def copy(self): + def copy(self, include_updatable_attrs=False): newobj = super(SealableWithUpdatableAttributes, self).copy() # Remove the updatable attributes - for k, v in self.iter_updatable_attrs(): - newobj._del_attr(k) + if not include_updatable_attrs: + for k, v in self.iter_updatable_attrs(): + newobj._del_attr(k) return newobj From fd09763cb4f6636bcb7a14e857ac555151acfd9b Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 1 Nov 2017 16:18:14 +0100 Subject: [PATCH 063/101] Keep inputlink_cache when creating cached Node. --- aiida/backends/tests/work/workChain.py | 26 ++++++++++++++++++------ aiida/orm/implementation/general/node.py | 3 ++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index d6ccd1b739..9758acb8e0 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -391,7 +391,10 @@ def test_fastforwarding(self): a=Int(1), b=Int(2), _fast_forward=True, _return_pid=True ) - self.assertEquals(pid, self.reference_pid) + self.assertEquals( + self.reference_wc.uuid, + load_node(pid).get_extra('cached_from') + ) self.assertEquals(res, self.reference_result) def test_fastforwarding_notexecuted(self): @@ -400,7 +403,10 @@ def test_fastforwarding_notexecuted(self): a=Int(1), b=Int(2), _fast_forward=True, _return_pid=True ) - self.assertEquals(pid, self.reference_pid) + self.assertEquals( + self.reference_wc.uuid, + load_node(pid).get_extra('cached_from') + ) self.assertEquals(res, self.reference_result) def test_fastforwarding_notexecuted_testworks(self): @@ -423,7 +429,12 @@ def test_fastforwarding_twice(self): a=Int(1), b=Int(2), _fast_forward=True, _return_pid=True ) - self.assertEquals(pid1, pid2) + wc1 = load_node(pid1) + wc2 = load_node(pid2) + self.assertEquals( + wc1.get_extra('cached_from', wc1.uuid), + wc2.get_extra('cached_from', wc2.uuid) + ) self.assertEquals(res1, res2) def test_fastforwarding_default(self): @@ -432,7 +443,10 @@ def test_fastforwarding_default(self): a=Int(1), _fast_forward=True, _return_pid=True ) - self.assertEquals(pid, self.reference_pid) + self.assertEquals( + load_node(pid).get_extra('cached_from'), + self.reference_wc.uuid + ) self.assertEquals(res, self.reference_result) def test_fastforwarding_different(self): @@ -441,7 +455,7 @@ def test_fastforwarding_different(self): a=Int(2), b=Int(1), _fast_forward=True, _return_pid=True ) - self.assertNotEquals(pid, self.reference_pid) + self.assertFalse('cached_from' in load_node(pid).extras()) self.assertNotEquals(res, self.reference_result) @@ -685,4 +699,4 @@ def test_simple_kill_through_node(self): self.assertEquals(future.process.calc.has_finished_ok(), False) self.assertEquals(future.process.calc.has_failed(), False) self.assertEquals(future.process.calc.has_aborted(), True) - engine.shutdown() \ No newline at end of file + engine.shutdown() diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index c46d501f46..b92559df6f 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1567,7 +1567,6 @@ def store(self, with_transaction=True, use_cache=None): # the case. self._check_are_parents_stored() - # Get default for use_cache if it's not set explicitly. if use_cache is None: use_cache = get_use_cache_default() @@ -1575,7 +1574,9 @@ def store(self, with_transaction=True, use_cache=None): same_node = self.get_same_node() if use_cache else None if same_node is not None: new_node = same_node.copy(include_updatable_attrs=True) + inputlinks_cache = self._inputlinks_cache self.__dict__ = new_node.__dict__ + self._inputlinks_cache = inputlinks_cache # self._repo_folder = new_node._repo_folder self.store(with_transaction=with_transaction, use_cache=False) self.set_extra('cached_from', same_node.uuid) From 5646de9a293fc4d492b299b3a9f1d26f93bc29a1 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 08:17:19 +0100 Subject: [PATCH 064/101] Add class-level '_cacheable' attribute --- .../general/calculation/__init__.py | 2 + .../general/calculation/inline.py | 2 + .../general/calculation/job/__init__.py | 2 + aiida/orm/implementation/general/node.py | 43 ++++++++++++++++--- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index f662171a82..c70c48ea10 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -83,6 +83,8 @@ class AbstractCalculation(SealableWithUpdatableAttributes): calculations run via a scheduler. """ + _cacheable = False + @classproperty def _hash_ignored_attributes(cls): return super(AbstractCalculation, cls)._hash_ignored_attributes + [ diff --git a/aiida/orm/implementation/general/calculation/inline.py b/aiida/orm/implementation/general/calculation/inline.py index a51bfb280c..a0f2cedc5d 100644 --- a/aiida/orm/implementation/general/calculation/inline.py +++ b/aiida/orm/implementation/general/calculation/inline.py @@ -21,6 +21,8 @@ class InlineCalculation(Calculation): for a simple calculation """ + _cacheable = True + def get_function_name(self): """ Get the function name. diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 2e99154060..fa3b12afe5 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -36,6 +36,8 @@ class AbstractJobCalculation(object): remotely on a job scheduler. """ + _cacheable = True + @classproperty def _hash_ignored_attributes(cls): # _updatable_attributes are ignored automatically. diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index b92559df6f..394653a5ed 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -149,6 +149,9 @@ def __new__(cls, name, bases, attrs): # A list of attribute names that will be ignored when creating the hash. _hash_ignored_attributes = [] + # Flag that determines whether the class can be cached. + _cacheable = True + def get_desc(self): """ Returns a string with infos retrieved from a node's @@ -1573,13 +1576,7 @@ def store(self, with_transaction=True, use_cache=None): # Retrieve the cached node. same_node = self.get_same_node() if use_cache else None if same_node is not None: - new_node = same_node.copy(include_updatable_attrs=True) - inputlinks_cache = self._inputlinks_cache - self.__dict__ = new_node.__dict__ - self._inputlinks_cache = inputlinks_cache - # self._repo_folder = new_node._repo_folder - self.store(with_transaction=with_transaction, use_cache=False) - self.set_extra('cached_from', same_node.uuid) + self._store_from_cache(same_node, with_transaction=with_transaction) else: # call implementation-dependent store method self._db_store(with_transaction) @@ -1604,6 +1601,35 @@ def store(self, with_transaction=True, use_cache=None): # n = Node().store() return self + def _store_from_cache(self, cache_node, with_transaction): + new_node = cache_node.copy(include_updatable_attrs=True) + inputlinks_cache = self._inputlinks_cache + # "impersonate" the copied node by getting all its attributes + self.__dict__ = new_node.__dict__ + # restore the input links + self._inputlinks_cache = inputlinks_cache + + # Make sure the node doesn't have any RETURN links + if cache_node.get_outputs(link_type=LinkType.RETURN): + raise ValueError("Cannot use cache from nodes with RETURN links.") + + self.store(with_transaction=with_transaction, use_cache=False) + self.set_extra('cached_from', cache_node.uuid) + + # add CREATE links + output_mapping = {} + for out_node in cache_node.get_outputs(link_type=LinkType.CREATE): + new_node = out_node.copy(include_updatable_attrs=True).store() + output_mapping[out_node.pk] = new_node + for linkname, out_node in cache_node.get_outputs_dict(link_type=LinkType.CREATE).items(): + out_pk = out_node.pk + new_node = output_mapping[out_pk] + suffix = '_{}'.format(out_pk) + if linkname.endswith(suffix): + linkname.rsplit(suffix, 1)[0] + linkname += '_{}'.format(new_node.pk) + new_node.add_link_from(self, label=linkname, link_type=LinkType.CREATE) + @abstractmethod def _db_store(self, with_transaction=True): """ @@ -1667,6 +1693,9 @@ def rehash(self): self.set_extra('hash', self.get_hash()) def get_same_node(self): + if not self._cacheable: + return None + from aiida.orm.querybuilder import QueryBuilder hash_ = self.get_hash() From 2b72bff012fdbe69b09493ee546940851b5cc63f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 08:49:47 +0100 Subject: [PATCH 065/101] Seal inline calculations --- aiida/orm/implementation/django/calculation/inline.py | 1 + aiida/orm/implementation/general/calculation/inline.py | 3 +++ aiida/orm/implementation/sqlalchemy/calculation/inline.py | 1 + 3 files changed, 5 insertions(+) diff --git a/aiida/orm/implementation/django/calculation/inline.py b/aiida/orm/implementation/django/calculation/inline.py index 41feecc4bf..112ac0e63e 100644 --- a/aiida/orm/implementation/django/calculation/inline.py +++ b/aiida/orm/implementation/django/calculation/inline.py @@ -92,6 +92,7 @@ def wrapped_function(*args, **kwargs): for v in retval.itervalues(): v.store(with_transaction=False) + c.seal() # Return the calculation and the return values return c, retval diff --git a/aiida/orm/implementation/general/calculation/inline.py b/aiida/orm/implementation/general/calculation/inline.py index a0f2cedc5d..4344922b29 100644 --- a/aiida/orm/implementation/general/calculation/inline.py +++ b/aiida/orm/implementation/general/calculation/inline.py @@ -31,6 +31,9 @@ def get_function_name(self): """ return self.get_attr('function_name', None) + def has_finished_ok(self): + return self.is_sealed + def make_inline(func): """ diff --git a/aiida/orm/implementation/sqlalchemy/calculation/inline.py b/aiida/orm/implementation/sqlalchemy/calculation/inline.py index 3c24088421..940e69b8ca 100644 --- a/aiida/orm/implementation/sqlalchemy/calculation/inline.py +++ b/aiida/orm/implementation/sqlalchemy/calculation/inline.py @@ -95,6 +95,7 @@ def wrapped_function(*args, **kwargs): for v in retval.itervalues(): v.store(with_transaction=False) + c.seal() # Return the calculation and the return values return c, retval From 9c6f90abb021a8a6a4b60227420e753f61973cff Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 08:58:24 +0100 Subject: [PATCH 066/101] Fix _store_from_cache --- aiida/orm/implementation/general/node.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 394653a5ed..8607ce9e67 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1621,13 +1621,9 @@ def _store_from_cache(self, cache_node, with_transaction): for out_node in cache_node.get_outputs(link_type=LinkType.CREATE): new_node = out_node.copy(include_updatable_attrs=True).store() output_mapping[out_node.pk] = new_node - for linkname, out_node in cache_node.get_outputs_dict(link_type=LinkType.CREATE).items(): + for linkname, out_node in cache_node.get_outputs(also_labels=True, link_type=LinkType.CREATE): out_pk = out_node.pk new_node = output_mapping[out_pk] - suffix = '_{}'.format(out_pk) - if linkname.endswith(suffix): - linkname.rsplit(suffix, 1)[0] - linkname += '_{}'.format(new_node.pk) new_node.add_link_from(self, label=linkname, link_type=LinkType.CREATE) @abstractmethod @@ -1695,7 +1691,7 @@ def rehash(self): def get_same_node(self): if not self._cacheable: return None - + from aiida.orm.querybuilder import QueryBuilder hash_ = self.get_hash() From e4d101d1b07f5921b8e521a2752910b97c6ab0cd Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 09:28:04 +0100 Subject: [PATCH 067/101] Simplify _store_from_cache code, add tests for InlineCalculation caching --- aiida/backends/tests/__init__.py | 1 + aiida/backends/tests/inline_calculation.py | 64 ++++++++++++++++++++++ aiida/orm/implementation/general/node.py | 6 +- 3 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 aiida/backends/tests/inline_calculation.py diff --git a/aiida/backends/tests/__init__.py b/aiida/backends/tests/__init__.py index 6685d9dc59..9059211b4f 100644 --- a/aiida/backends/tests/__init__.py +++ b/aiida/backends/tests/__init__.py @@ -64,6 +64,7 @@ 'daemon': ['aiida.backends.tests.daemon'], 'verdi_commands': ['aiida.backends.tests.verdi_commands'], 'caching_config': ['aiida.backends.tests.test_caching_config'], + 'inline_calculation': ['aiida.backends.tests.inline_calculation'], } } diff --git a/aiida/backends/tests/inline_calculation.py b/aiida/backends/tests/inline_calculation.py new file mode 100644 index 0000000000..d21681a5a0 --- /dev/null +++ b/aiida/backends/tests/inline_calculation.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +########################################################################### +# Copyright (c), The AiiDA team. All rights reserved. # +# This file is part of the AiiDA code. # +# # +# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core # +# For further information on the license, see the LICENSE.txt file # +# For further information please visit http://www.aiida.net # +########################################################################### +""" +Tests for inline calculations. +""" + +from aiida.orm.data.base import Int +from aiida.orm.calculation.inline import make_inline +from aiida.backends.testbase import AiidaTestCase + +class TestInlineCalculation(AiidaTestCase): + """ + Tests for the InlineCalculation calculations. + """ + def setUp(self): + @make_inline + def incr_inline(inp): + return {'res': Int(inp.value + 1)} + + self.incr_inline = incr_inline + + def test_incr(self): + """ + Simple test for the inline increment function. + """ + for i in [-4, 0, 3, 10]: + calc, res = self.incr_inline(inp=Int(i)) + self.assertEqual(res['res'].value, i + 1) + + def test_caching(self): + from aiida.common.caching import CONFIG, configure + configure() + use_cache_default = CONFIG['use_cache']['default'] + CONFIG['use_cache']['default'] = True + calc1, res1 = self.incr_inline(inp=Int(11)) + calc2, res2 = self.incr_inline(inp=Int(11)) + self.assertEquals(res1['res'].value, res2['res'].value, 12) + self.assertEquals(calc1.get_extra('cached_from', calc1.uuid), calc2.get_extra('cached_from')) + + CONFIG['use_cache']['default'] = use_cache_default + + def test_caching_change_code(self): + from aiida.common.caching import CONFIG, configure + configure() + use_cache_default = CONFIG['use_cache']['default'] + CONFIG['use_cache']['default'] = True + calc1, res1 = self.incr_inline(inp=Int(11)) + + @make_inline + def incr_inline(inp): + return {'res': Int(inp.value + 2)} + + calc2, res2 = incr_inline(inp=Int(11)) + self.assertNotEquals(res1['res'].value, res2['res'].value) + self.assertFalse('cached_from' in calc2.extras()) + + CONFIG['use_cache']['default'] = use_cache_default diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 8607ce9e67..6e8ca7bf04 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1618,12 +1618,8 @@ def _store_from_cache(self, cache_node, with_transaction): # add CREATE links output_mapping = {} - for out_node in cache_node.get_outputs(link_type=LinkType.CREATE): - new_node = out_node.copy(include_updatable_attrs=True).store() - output_mapping[out_node.pk] = new_node for linkname, out_node in cache_node.get_outputs(also_labels=True, link_type=LinkType.CREATE): - out_pk = out_node.pk - new_node = output_mapping[out_pk] + new_node = out_node.copy(include_updatable_attrs=True).store() new_node.add_link_from(self, label=linkname, link_type=LinkType.CREATE) @abstractmethod From d902d694b5f5297ae95ba535c65b4cbc89dde7a4 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 09:40:28 +0100 Subject: [PATCH 068/101] Merge configuration for 'use_cache' and 'fast_forward'. --- aiida/backends/tests/test_caching_config.py | 21 +++----- aiida/common/caching.py | 58 +++++++++------------ 2 files changed, 31 insertions(+), 48 deletions(-) diff --git a/aiida/backends/tests/test_caching_config.py b/aiida/backends/tests/test_caching_config.py index 15087b9337..a87c72ffcc 100644 --- a/aiida/backends/tests/test_caching_config.py +++ b/aiida/backends/tests/test_caching_config.py @@ -13,7 +13,7 @@ import yaml from aiida.backends.utils import get_current_profile -from aiida.common.caching import configure, get_fast_forward_enabled, get_use_cache_default +from aiida.common.caching import configure, get_use_cache, get_use_cache_default class CacheConfigTest(unittest.TestCase): """ @@ -25,12 +25,9 @@ def setUp(self): """ self.config_reference = { get_current_profile(): { - 'use_cache': {'default': True}, - 'fast_forward': { - 'default': True, - 'enabled': [], - 'disabled': ['aiida.orm.calculation.job.simpleplugins.templatereplacer.TemplatereplacerCalculation'] - } + 'default': True, + 'enabled': [], + 'disabled': ['aiida.orm.calculation.job.simpleplugins.templatereplacer.TemplatereplacerCalculation'] } } with tempfile.NamedTemporaryFile() as tf, open(tf.name, 'w') as of: @@ -40,13 +37,9 @@ def setUp(self): def tearDown(self): configure() - def test_use_cache_default(self): + def test_default(self): self.assertTrue(get_use_cache_default()) - def test_fast_forward_default(self): - from aiida.orm.calculation.job import JobCalculation - self.assertTrue(get_fast_forward_enabled(JobCalculation)) - - def test_fast_forward_disabled(self): + def test_caching_enabled(self): from aiida.orm.calculation.job.simpleplugins.templatereplacer import TemplatereplacerCalculation - self.assertFalse(get_fast_forward_enabled(TemplatereplacerCalculation)) + self.assertFalse(get_use_cache(TemplatereplacerCalculation)) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 0fc869571d..1f7d5ff699 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -18,19 +18,16 @@ from aiida.common.setup import AIIDA_CONFIG_FOLDER from aiida.backends.utils import get_current_profile -__all__ = ['get_fast_forward_enabled', 'get_use_cache_default'] +__all__ = ['get_use_cache', 'get_use_cache_default'] config_keys = Enumerate(( - 'use_cache', 'fast_forward', 'default', 'enabled', 'disabled' + 'default', 'enabled', 'disabled' )) DEFAULT_CONFIG = { - config_keys.use_cache: {config_keys.default: False}, - config_keys.fast_forward: { - config_keys.default: False, - config_keys.enabled: [], - config_keys.disabled: [], - } + config_keys.default: False, + config_keys.enabled: [], + config_keys.disabled: [], } def _get_config(config_file): @@ -42,26 +39,20 @@ def _get_config(config_file): return DEFAULT_CONFIG # validate configuration - for key, value in config.items(): - error_msg = "Configuration error: Invalid key '{}' in cache_config.yml" + for key in config: if key not in DEFAULT_CONFIG: - raise ValueError(error_msg.format(key)) - for sub_key in value: - if sub_key not in DEFAULT_CONFIG[key]: - raise ValueError(error_msg.format(sub_key)) + raise ValueError( + "Configuration error: Invalid key '{}' in cache_config.yml".format(key) + ) # add defaults where config is missing for key, default_config in DEFAULT_CONFIG.items(): - config[key] = config.get(key, {}) - for sub_key, sub_default_config in default_config.items(): - config[key][sub_key] = config[key].get(sub_key, sub_default_config) + config[key] = config.get(key, default_config) # load classes try: - for config_part in config.values(): - for key in [config_keys.enabled, config_keys.disabled]: - if key in config_part: - config_part[key] = [load_class(c) for c in config_part[key]] + for key in [config_keys.enabled, config_keys.disabled]: + config[key] = [load_class(c) for c in config[key]] except (ImportError, ClassNotFoundException) as err: raise_from( ConfigurationError("Unknown class given in 'cache_config.yml': '{}'".format(err)), @@ -69,27 +60,26 @@ def _get_config(config_file): ) return config -CONFIG = {} +_CONFIG = {} def configure(config_file=os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), 'cache_config.yml')): """ - Reads the caching configuration file and sets the CONFIG variable. + Reads the caching configuration file and sets the _CONFIG variable. """ - global CONFIG - CONFIG.clear() - CONFIG.update(_get_config(config_file=config_file)) + global _CONFIG + _CONFIG.clear() + _CONFIG.update(_get_config(config_file=config_file)) def get_use_cache_default(): - if not CONFIG: + if not _CONFIG: configure() - return CONFIG[config_keys.use_cache][config_keys.default] + return _CONFIG[config_keys.default] -def get_fast_forward_enabled(calc_class): - if not CONFIG: +def get_use_cache(calc_class): + if not _CONFIG: configure() - fast_forward_config = CONFIG[config_keys.fast_forward] - enabled = calc_class in fast_forward_config[config_keys.enabled] - disabled = calc_class in fast_forward_config[config_keys.disabled] + enabled = calc_class in _CONFIG[config_keys.enabled] + disabled = calc_class in _CONFIG[config_keys.disabled] if enabled and disabled: raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(calc_class)) elif enabled: @@ -97,4 +87,4 @@ def get_fast_forward_enabled(calc_class): elif disabled: return False else: - return fast_forward_config[config_keys.default] + return get_use_cache_default() From 644d3497cf54c141b1ed41c5f33a1e84acedf346 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 10:04:37 +0100 Subject: [PATCH 069/101] Add enable_caching and disable_caching contextmanagers. --- aiida/backends/tests/inline_calculation.py | 39 +++++-------- aiida/backends/tests/test_caching_config.py | 16 +++++- aiida/common/caching.py | 62 +++++++++++++++------ aiida/orm/implementation/general/node.py | 4 +- 4 files changed, 74 insertions(+), 47 deletions(-) diff --git a/aiida/backends/tests/inline_calculation.py b/aiida/backends/tests/inline_calculation.py index d21681a5a0..a62beb916c 100644 --- a/aiida/backends/tests/inline_calculation.py +++ b/aiida/backends/tests/inline_calculation.py @@ -12,7 +12,8 @@ """ from aiida.orm.data.base import Int -from aiida.orm.calculation.inline import make_inline +from aiida.common.caching import enable_caching +from aiida.orm.calculation.inline import make_inline, InlineCalculation from aiida.backends.testbase import AiidaTestCase class TestInlineCalculation(AiidaTestCase): @@ -35,30 +36,20 @@ def test_incr(self): self.assertEqual(res['res'].value, i + 1) def test_caching(self): - from aiida.common.caching import CONFIG, configure - configure() - use_cache_default = CONFIG['use_cache']['default'] - CONFIG['use_cache']['default'] = True - calc1, res1 = self.incr_inline(inp=Int(11)) - calc2, res2 = self.incr_inline(inp=Int(11)) - self.assertEquals(res1['res'].value, res2['res'].value, 12) - self.assertEquals(calc1.get_extra('cached_from', calc1.uuid), calc2.get_extra('cached_from')) - - CONFIG['use_cache']['default'] = use_cache_default + with enable_caching(InlineCalculation): + calc1, res1 = self.incr_inline(inp=Int(11)) + calc2, res2 = self.incr_inline(inp=Int(11)) + self.assertEquals(res1['res'].value, res2['res'].value, 12) + self.assertEquals(calc1.get_extra('cached_from', calc1.uuid), calc2.get_extra('cached_from')) def test_caching_change_code(self): - from aiida.common.caching import CONFIG, configure - configure() - use_cache_default = CONFIG['use_cache']['default'] - CONFIG['use_cache']['default'] = True - calc1, res1 = self.incr_inline(inp=Int(11)) - - @make_inline - def incr_inline(inp): - return {'res': Int(inp.value + 2)} + with enable_caching(InlineCalculation): + calc1, res1 = self.incr_inline(inp=Int(11)) - calc2, res2 = incr_inline(inp=Int(11)) - self.assertNotEquals(res1['res'].value, res2['res'].value) - self.assertFalse('cached_from' in calc2.extras()) + @make_inline + def incr_inline(inp): + return {'res': Int(inp.value + 2)} - CONFIG['use_cache']['default'] = use_cache_default + calc2, res2 = incr_inline(inp=Int(11)) + self.assertNotEquals(res1['res'].value, res2['res'].value) + self.assertFalse('cached_from' in calc2.extras()) diff --git a/aiida/backends/tests/test_caching_config.py b/aiida/backends/tests/test_caching_config.py index a87c72ffcc..1fe3f35e3f 100644 --- a/aiida/backends/tests/test_caching_config.py +++ b/aiida/backends/tests/test_caching_config.py @@ -13,7 +13,8 @@ import yaml from aiida.backends.utils import get_current_profile -from aiida.common.caching import configure, get_use_cache, get_use_cache_default +from aiida.common.caching import configure, get_use_cache, enable_caching, disable_caching +from aiida.orm.calculation.job.simpleplugins.templatereplacer import TemplatereplacerCalculation class CacheConfigTest(unittest.TestCase): """ @@ -38,8 +39,17 @@ def tearDown(self): configure() def test_default(self): - self.assertTrue(get_use_cache_default()) + self.assertTrue(get_use_cache()) def test_caching_enabled(self): - from aiida.orm.calculation.job.simpleplugins.templatereplacer import TemplatereplacerCalculation self.assertFalse(get_use_cache(TemplatereplacerCalculation)) + + def test_invalid_config(self): + with enable_caching(TemplatereplacerCalculation): + self.assertRaises(ValueError, get_use_cache, TemplatereplacerCalculation) + + def test_disable_caching(self): + from aiida.orm.data.base import Float + with disable_caching(Float): + self.assertFalse(get_use_cache(Float)) + self.assertTrue(get_use_cache(Float)) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 1f7d5ff699..831c315684 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -2,12 +2,15 @@ # -*- coding: utf-8 -*- import os -import yaml +import copy +from functools import wraps +from contextlib import contextmanager try: from collections import ChainMap except ImportError: from chainmap import ChainMap +import yaml from plum.util import load_class from future.utils import raise_from from plum.exceptions import ClassNotFoundException @@ -18,7 +21,7 @@ from aiida.common.setup import AIIDA_CONFIG_FOLDER from aiida.backends.utils import get_current_profile -__all__ = ['get_use_cache', 'get_use_cache_default'] +__all__ = ['get_use_cache', 'enable_caching', 'disable_caching'] config_keys = Enumerate(( 'default', 'enabled', 'disabled' @@ -70,21 +73,44 @@ def configure(config_file=os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), _CONFIG.clear() _CONFIG.update(_get_config(config_file=config_file)) -def get_use_cache_default(): - if not _CONFIG: - configure() +def _with_config(func): + @wraps(func) + def inner(*args, **kwargs): + if not _CONFIG: + configure() + return func(*args, **kwargs) + return inner + +@_with_config +def get_use_cache(node_class=None): + if node_class is not None: + enabled = node_class in _CONFIG[config_keys.enabled] + disabled = node_class in _CONFIG[config_keys.disabled] + if enabled and disabled: + raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(node_class)) + elif enabled: + return True + elif disabled: + return False return _CONFIG[config_keys.default] -def get_use_cache(calc_class): - if not _CONFIG: - configure() - enabled = calc_class in _CONFIG[config_keys.enabled] - disabled = calc_class in _CONFIG[config_keys.disabled] - if enabled and disabled: - raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(calc_class)) - elif enabled: - return True - elif disabled: - return False - else: - return get_use_cache_default() +@contextmanager +@_with_config +def _reset_config(): + global _CONFIG + config_copy = copy.deepcopy(_CONFIG) + yield + _CONFIG.clear() + _CONFIG.update(config_copy) + +@contextmanager +def enable_caching(node_class): + with _reset_config(): + _CONFIG[config_keys.enabled].append(node_class) + yield + +@contextmanager +def disable_caching(node_class): + with _reset_config(): + _CONFIG[config_keys.disabled].append(node_class) + yield diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 6e8ca7bf04..1b7482187e 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -24,7 +24,7 @@ from aiida.common.folders import SandboxFolder from aiida.common.utils import abstractclassmethod from aiida.common.utils import combomethod -from aiida.common.caching import get_use_cache_default +from aiida.common.caching import get_use_cache from aiida.common.links import LinkType from aiida.common.lang import override from aiida.common.old_pluginloader import get_query_type_string @@ -1572,7 +1572,7 @@ def store(self, with_transaction=True, use_cache=None): # Get default for use_cache if it's not set explicitly. if use_cache is None: - use_cache = get_use_cache_default() + use_cache = get_use_cache(type(self)) # Retrieve the cached node. same_node = self.get_same_node() if use_cache else None if same_node is not None: From 389aa8d028e087be56df0a0d6fb31b828d1c620f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 10:16:51 +0100 Subject: [PATCH 070/101] Merge duplicate 'general' implementations of InlineCalculation. Add 'None' default to enable / disable caching, which sets the default. --- aiida/common/caching.py | 14 ++++++++++---- aiida/orm/calculation/inline.py | 18 +----------------- .../general/calculation/inline.py | 8 ++++++++ 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index 831c315684..b0cec16e60 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -104,13 +104,19 @@ def _reset_config(): _CONFIG.update(config_copy) @contextmanager -def enable_caching(node_class): +def enable_caching(node_class=None): with _reset_config(): - _CONFIG[config_keys.enabled].append(node_class) + if node_class is None: + _CONFIG[config_keys.default] = True + else: + _CONFIG[config_keys.enabled].append(node_class) yield @contextmanager -def disable_caching(node_class): +def disable_caching(node_class=None): with _reset_config(): - _CONFIG[config_keys.disabled].append(node_class) + if node_class is None: + _CONFIG[config_keys.default] = True + else: + _CONFIG[config_keys.disabled].append(node_class) yield diff --git a/aiida/orm/calculation/inline.py b/aiida/orm/calculation/inline.py index 5749c0bc9b..1869e3d602 100644 --- a/aiida/orm/calculation/inline.py +++ b/aiida/orm/calculation/inline.py @@ -9,21 +9,7 @@ ########################################################################### -from aiida.orm.implementation.calculation import InlineCalculation as _IC, \ - make_inline - - -class InlineCalculation(_IC): - """ - Here I put all the attributes/method that are common to all backends - """ - def get_desc(self): - """ - Returns a string with infos retrieved from a InlineCalculation node's - properties. - :return: description string - """ - return "{}()".format(self.get_function_name()) +from aiida.orm.implementation.calculation import InlineCalculation, make_inline def optional_inline(func): """ @@ -63,5 +49,3 @@ def wrapped_function(*args, **kwargs): return func(*args, **kwargs) return wrapped_function - - diff --git a/aiida/orm/implementation/general/calculation/inline.py b/aiida/orm/implementation/general/calculation/inline.py index 4344922b29..2a7dc0c181 100644 --- a/aiida/orm/implementation/general/calculation/inline.py +++ b/aiida/orm/implementation/general/calculation/inline.py @@ -23,6 +23,14 @@ class InlineCalculation(Calculation): _cacheable = True + def get_desc(self): + """ + Returns a string with infos retrieved from a InlineCalculation node's + properties. + :return: description string + """ + return "{}()".format(self.get_function_name()) + def get_function_name(self): """ Get the function name. From e4af12d3c774d9cb990a3bdd222694aed5254a0a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 10:24:12 +0100 Subject: [PATCH 071/101] Remove fast-forwarding tests for WorkChain and workfunction --- aiida/backends/tests/work/workChain.py | 73 ----------------------- aiida/backends/tests/work/workfunction.py | 26 -------- aiida/work/process.py | 10 ++-- 3 files changed, 5 insertions(+), 104 deletions(-) diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index 9758acb8e0..d4327a9072 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -385,79 +385,6 @@ def test_hash(self): self.assertEquals(wc.get_hash(), self.reference_wc.get_hash()) self.assertNotEquals(wc.get_hash(), None) - def test_fastforwarding(self): - res, pid = run( - self.wf_class, - a=Int(1), b=Int(2), - _fast_forward=True, _return_pid=True - ) - self.assertEquals( - self.reference_wc.uuid, - load_node(pid).get_extra('cached_from') - ) - self.assertEquals(res, self.reference_result) - - def test_fastforwarding_notexecuted(self): - res, pid = run( - self.wf_class_broken, - a=Int(1), b=Int(2), - _fast_forward=True, _return_pid=True - ) - self.assertEquals( - self.reference_wc.uuid, - load_node(pid).get_extra('cached_from') - ) - self.assertEquals(res, self.reference_result) - - def test_fastforwarding_notexecuted_testworks(self): - self.assertRaises( - ValueError, - run, - self.wf_class_broken, - a=Int(1), b=Int(2), - _fast_forward=False, _return_pid=True - ) - - def test_fastforwarding_twice(self): - res1, pid1 = run( - self.wf_class, - a=Int(1), b=Int(2), - _fast_forward=True, _return_pid=True - ) - res2, pid2 = run( - self.wf_class, - a=Int(1), b=Int(2), - _fast_forward=True, _return_pid=True - ) - wc1 = load_node(pid1) - wc2 = load_node(pid2) - self.assertEquals( - wc1.get_extra('cached_from', wc1.uuid), - wc2.get_extra('cached_from', wc2.uuid) - ) - self.assertEquals(res1, res2) - - def test_fastforwarding_default(self): - res, pid = run( - self.wf_class, - a=Int(1), - _fast_forward=True, _return_pid=True - ) - self.assertEquals( - load_node(pid).get_extra('cached_from'), - self.reference_wc.uuid - ) - self.assertEquals(res, self.reference_result) - - def test_fastforwarding_different(self): - res, pid = run( - self.wf_class, - a=Int(2), b=Int(1), - _fast_forward=True, _return_pid=True - ) - self.assertFalse('cached_from' in load_node(pid).extras()) - self.assertNotEquals(res, self.reference_result) - class TestWorkchainWithOldWorkflows(AiidaTestCase): diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index fe5f8429b4..592dc6583c 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -66,32 +66,6 @@ def test_hashes_different(self): w2 = load_node(pid2) self.assertNotEqual(w1.get_hash(), w2.get_hash()) - def test_caching(self): - # Creating a new workfunction to avoid getting other results. - @workfunction - def simple_cached_wf(inp): - return {'result': inp} - - r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) - r2, pid2 = run(simple_cached_wf, inp=Int(2), _return_pid=True, _fast_forward=True) - self.assertEqual(r, r2) - self.assertEqual(pid, pid2) - self._check_hash_consistent(pid) - self._check_hash_consistent(pid2) - - def test_caching_different(self): - # Creating a new workfunction to avoid getting other results. - @workfunction - def simple_cached_wf(inp): - return {'result': inp} - - r, pid = run(simple_cached_wf, inp=Int(2), _return_pid=True) - r2, pid2 = run(simple_cached_wf, inp=Int(3), _return_pid=True, _fast_forward=True) - self.assertNotEqual(r, r2) - self.assertNotEqual(pid, pid2) - self._check_hash_consistent(pid) - self._check_hash_consistent(pid2) - def _check_hash_consistent(self, pid): wc = load_node(pid) self.assertEqual(wc.get_hash(), wc.get_extra('hash')) diff --git a/aiida/work/process.py b/aiida/work/process.py index 405ce27b6f..ed488e4b78 100644 --- a/aiida/work/process.py +++ b/aiida/work/process.py @@ -331,7 +331,7 @@ def _create_and_setup_db_record(self): self._calc = self.create_db_record() self._setup_db_record() if self.inputs._store_provenance: - self.calc.store_all(use_cache=self._fast_forward_enabled()) + self.calc.store_all(use_cache=self._use_cache_enabled()) if self.calc.has_finished_ok(): self._state = plum.process.ProcessState.FINISHED for name, value in self.calc.get_outputs_dict(link_type=LinkType.RETURN).items(): @@ -408,15 +408,15 @@ def _add_description_and_label(self): if label is not None: self._calc.label = label - def _fast_forward_enabled(self): + def _use_cache_enabled(self): # First priority: inputs try: - return self._parsed_inputs['_fast_forward'] + return self._parsed_inputs['_use_cache'] # Second priority: config except KeyError: return ( - caching.get_fast_forward_enabled(type(self)) or - caching.get_fast_forward_enabled(type(self._calc)) + caching.get_use_cache(type(self)) or + caching.get_use_cache(type(self._calc)) ) class FunctionProcess(Process): From 01a9143ada27998bd644019dda3e24d1ba541ff0 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 11:16:34 +0100 Subject: [PATCH 072/101] Set calculation state to PARSING before adding cached outputs, and match cached state after --- .../orm/implementation/general/calculation/job/__init__.py | 7 +++++++ aiida/orm/implementation/general/node.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index fa3b12afe5..2417ec7315 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -131,6 +131,13 @@ def store(self, *args, **kwargs): # c = Calculation().store() return self + def _add_outputs_from_cache(self, cache_node): + self._set_state(calc_states.PARSING) + super(AbstractJobCalculation, self)._add_outputs_from_cache( + cache_node=cache_node + ) + self._set_state(cache_node.get_state()) + def _validate(self): """ Verify if all the input nodes are present and valid. diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 1b7482187e..26c087bca9 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1577,6 +1577,7 @@ def store(self, with_transaction=True, use_cache=None): same_node = self.get_same_node() if use_cache else None if same_node is not None: self._store_from_cache(same_node, with_transaction=with_transaction) + self._add_outputs_from_cache(same_node) else: # call implementation-dependent store method self._db_store(with_transaction) @@ -1616,6 +1617,7 @@ def _store_from_cache(self, cache_node, with_transaction): self.store(with_transaction=with_transaction, use_cache=False) self.set_extra('cached_from', cache_node.uuid) + def _add_outputs_from_cache(self, cache_node): # add CREATE links output_mapping = {} for linkname, out_node in cache_node.get_outputs(also_labels=True, link_type=LinkType.CREATE): From 338f952439f87c00e60e3358674e3d7fcf0cb532 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 18:01:01 +0100 Subject: [PATCH 073/101] Add caching documentation --- aiida/backends/tests/work/workChain.py | 2 +- .../general/calculation/__init__.py | 3 + aiida/orm/implementation/general/node.py | 3 + docs/source/caching/index.rst | 78 +++++++++++++++++++ docs/source/conf.py | 2 +- docs/source/user_guide/index.rst | 3 +- 6 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 docs/source/caching/index.rst diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index d4327a9072..373913d5ea 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -379,7 +379,7 @@ def test_hash(self): res, pid = run( self.wf_class, a=Int(1), b=Int(2), - _fast_forward=True, _return_pid=True + _use_cache=True, _return_pid=True ) wc = load_node(pid) self.assertEquals(wc.get_hash(), self.reference_wc.get_hash()) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index c70c48ea10..8104a1cec9 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -346,6 +346,9 @@ def _is_valid_cache(self): return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() def _get_objects_to_hash(self): + """ + Return a list of objects which should be included in the hash. + """ res = super(AbstractCalculation, self)._get_objects_to_hash() res.append({ key: value.get_hash() diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 26c087bca9..d62cac764b 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1669,6 +1669,9 @@ def get_hash(self, ignore_errors=True, **kwargs): raise e def _get_objects_to_hash(self): + """ + Return a list of objects which should be included in the hash. + """ return [ importlib.import_module( self.__module__.split('.', 1)[0] diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst new file mode 100644 index 0000000000..01f13b33b0 --- /dev/null +++ b/docs/source/caching/index.rst @@ -0,0 +1,78 @@ +.. _caching: + +Caching +======= + +When working with AiiDA, you might sometimes re-run calculations which were already successfully executed. Because this can waste a lot of computational resources, you can enable AiiDA to **cache** calculations, which means that it will re-use existing calculations if a calculation with the same inputs is submitted again. + +When a calculation is cached, a copy of the original calculation is created. This copy will keep the input links of the new calculation. The outputs of the original calculation are also copied, and linked to the new calculation. This allows for the new calculation to be a separate Node in the provenance graph and, critically, preserves the acyclicity of the graph. + +Caching is also implemented for Data nodes. This is not very useful in practice (yet), but is an easy way to show how the caching mechanism works: + +.. ipython:: + :verbatim: + + In [1]: from __future__ import print_function + + In [2]: from aiida.orm.data.base import Str + + In [3]: n1 = Str('test string') + + In [4]: n1.store() + Out[4]: u'test string' + + In [5]: n2 = Str('test string') + + In [6]: n2.store(use_cache=True) + Out[6]: u'test string' + + In [7]: print('UUID of n1:', n1.uuid) + UUID of n1: 956109e1-4382-4240-a711-2a4f3b522122 + + In [8]: print('n2 is cached from:', n2.get_extra('cached_from')) + n2 is cached from: 956109e1-4382-4240-a711-2a4f3b522122 + +As you can see, passing ``use_cache=True`` to the ``store`` method enables using the cache. The fact that ``n2`` was created from ``n1`` is stored in the ``cached_from`` extra of ``n2``. + +When running a ``JobCalculation`` through the ``Process`` interface, you cannot directly set the ``use_cache`` flag the calculation node is stored internally. Instead, you can pass the ``_use_cache`` flag to the ``run`` or ``submit`` method. + +Caching is **not** implemented for workchains and workfunctions. Unlike calculations, they can not only create new data nodes, but only return exsting ones. When copying a cached workchain, it's not clear which node should be returned without actually running the workchain. + +Configuration +------------- + +Of course, using caching would be quite tedious if you had to set ``use_cache`` manually everywhere. To fix this, the default for ``use_cache`` can be set in the ``.aiida/cache_config.yml`` file. You can specify a global default, or enable / disable caching for specific calculation or data classes. An example configuration file might look like this: + +.. code:: yaml + + profile-name: + default: False + enabled: + - aiida.orm.calculation.job.simpleplugins.templatereplacer.TemplatereplacerCalculation + - aiida.orm.data.base.Str + disabled: + - aiida.orm.data.base.Float + +This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str``, and disabled for all other classes. In this example, manually disabling ``aiida.orm.data.base.Float`` is actually not needed, since the default value for all classes is already ``False``. Note also that the fully qualified class import name (e.g., ``aiida.orm.data.base.Str``) must be given, not just the class name (``Str``). This is to avoid accidentally matching classes with the same name. + +How are cached nodes matched? +----------------------------- + +To determine wheter a given node is identical to an existing one, a hash of the content of the node is created. If a node of the same class with the same hash already exists in the database, this is considered a cache match. You can manually check the hash of a given node with the :meth:`.get_hash() <.AbstractNode.get_hash>` method. + +By default, this hash is created from: + +* all attributes of a node, except the ``_updatable_attributes`` +* the ``__version__`` of the module which defines the node class +* the content of the repository folder of the node + +In the case of calculations, the hashes of the inputs are also included. When developing calculation and data classes, there are some methods you can use to determine how the hash is created: + +* To ignore specific attributes, a ``Node`` subclass can have a ``_hash_ignored_attributes`` attribute. This is a list of attribute names which are ignored when creating the hash. +* To add things which should be considered in the hash, you can override the :meth:`_get_objects_to_hash <.AbstractNode._get_objects_to_hash>` method. Note that doing so overrides the behavior described above, so you should make sure to use the ``super()`` method. +* Pass a keyword argument to :meth:`.get_hash <.AbstractNode.get_hash>`. These are passed on to ``aiida.common.hashing.make_hash``. For example, the ``ignored_folder_content`` keyword is used by the :class:`JobCalculation <.AbstractJobCalculation>` to ignore the ``raw_input`` subfolder of its repository folder. + +Additionally, there are two methods you can use to disable caching for particular nodes: + +* The :meth:`._is_valid_cache` method determines whether a particular node can be used as a cache. This is used for example to disable caching from failed calculations. +* Node classes have a ``_cacheable`` attribute, which can be set to ``False`` to completely switch off caching for nodes of that class. This avoids performing queries for the hash altogether. diff --git a/docs/source/conf.py b/docs/source/conf.py index f60cd7fdf5..4b2c05469b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,7 +43,7 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'IPython.sphinxext.ipython_console_highlighting', 'IPython.sphinxext.ipython_directive'] todo_include_todos = True diff --git a/docs/source/user_guide/index.rst b/docs/source/user_guide/index.rst index b2c8857f69..79ace8d9e7 100644 --- a/docs/source/user_guide/index.rst +++ b/docs/source/user_guide/index.rst @@ -8,5 +8,6 @@ User's guide ../get_started/index ../working_with_aiida/index ../tutorial/index + ../caching/index ../import_export/index - ../restapi/index \ No newline at end of file + ../restapi/index From 62ba9d9beac82f45adbb72309c287be46b15ccf7 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 6 Nov 2017 18:02:28 +0100 Subject: [PATCH 074/101] Update singledispatch requirement to 3.4.0.3 --- setup_requirements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_requirements.py b/setup_requirements.py index 6e6fc311e3..cf86cd4366 100644 --- a/setup_requirements.py +++ b/setup_requirements.py @@ -69,7 +69,7 @@ extras_require = { # Requirements for Python 2 only - ':python_version < "3"': ['chainmap', 'pathlib2', 'singledispatch >= 3.4.0.0'], + ':python_version < "3"': ['chainmap', 'pathlib2', 'singledispatch >= 3.4.0.3'], # Requirements for ssh transport with authentification through Kerberos # token # N. B.: you need to install first libffi and MIT kerberos GSSAPI including header files. From 20691dd274185327e92f90329b06af27f0b26787 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 13 Nov 2017 12:16:37 +0100 Subject: [PATCH 075/101] Add cache tests to test_daemon.py --- .travis-data/test_daemon.py | 101 +++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index c96f8f665b..ead12a6375 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -98,6 +98,55 @@ def validate_workchains(expected_results): return valid +def create_calculation(code, counter, inputval, use_cache=False): + parameters = ParameterData(dict={'value': inputval}) + template = ParameterData(dict={ + ## The following line adds a significant sleep time. + ## I set it to 1 second to speed up tests + ## I keep it to a non-zero value because I want + ## To test the case when AiiDA finds some calcs + ## in a queued state + #'cmdline_params': ["{}".format(counter % 3)], # Sleep time + 'cmdline_params': ["1"], + 'input_file_template': "{value}", # File just contains the value to double + 'input_file_name': 'value_to_double.txt', + 'output_file_name': 'output.txt', + 'retrieve_temporary_files': ['triple_value.tmp'] + }) + calc = code.new_calc() + calc.set_max_wallclock_seconds(5 * 60) # 5 min + calc.set_resources({"num_machines": 1}) + calc.set_withmpi(False) + calc.set_parser_name('simpleplugins.templatereplacer.test.doubler') + + calc.use_parameters(parameters) + calc.use_template(template) + calc.store_all(use_cache=use_cache) + expected_result = { + 'value': 2 * inputval, + 'retrieved_temporary_files': { + 'triple_value.tmp': str(inputval * 3) + } + } + print "[{}] created calculation {}, pk={}".format( + counter, calc.uuid, calc.dbnode.pk) + return calc, expected_result + +def submit_calculation(code, counter, inputval): + calc, expected_result = create_calculation( + code=code, counter=counter, inputval=inputval + ) + calc.submit() + print "[{}] calculation submitted.".format(counter) + return calc, expected_result + +def create_cache_calc(code, counter, inputval): + calc, expected_result = create_calculation( + code=code, counter=counter, inputval=inputval, use_cache=True + ) + print "[{}] created cached calculation.".format(counter) + return calc, expected_result + def main(): # Submitting the Calculations @@ -106,39 +155,10 @@ def main(): expected_results_calculations = {} for counter in range(1, number_calculations + 1): inputval = counter - parameters = ParameterData(dict={'value': inputval}) - template = ParameterData(dict={ - ## The following line adds a significant sleep time. - ## I set it to 1 second to speed up tests - ## I keep it to a non-zero value because I want - ## To test the case when AiiDA finds some calcs - ## in a queued state - #'cmdline_params': ["{}".format(counter % 3)], # Sleep time - 'cmdline_params': ["1"], - 'input_file_template': "{value}", # File just contains the value to double - 'input_file_name': 'value_to_double.txt', - 'output_file_name': 'output.txt', - 'retrieve_temporary_files': ['triple_value.tmp'] - }) - calc = code.new_calc() - calc.set_max_wallclock_seconds(5 * 60) # 5 min - calc.set_resources({"num_machines": 1}) - calc.set_withmpi(False) - calc.set_parser_name('simpleplugins.templatereplacer.test.doubler') - - calc.use_parameters(parameters) - calc.use_template(template) - calc.store_all() - print "[{}] created calculation {}, pk={}".format( - counter, calc.uuid, calc.dbnode.pk) - expected_results_calculations[calc.pk] = { - 'value': inputval * 2, - 'retrieved_temporary_files': { - 'triple_value.tmp': str(inputval * 3) - } - } - calc.submit() - print "[{}] calculation submitted.".format(counter) + calc, expected_result = submit_calculation( + code=code, counter=counter, inputval=inputval + ) + expected_results_calculations[calc.pk] = expected_result # Submitting the Workchains print "Submitting {} workchains to the daemon".format(number_workchains) @@ -158,7 +178,7 @@ def main(): exited_with_timeout = True while time.time() - start_time < timeout_secs: time.sleep(15) # Wait a few seconds - + # Print some debug info, both for debugging reasons and to avoid # that the test machine is shut down because there is no output @@ -168,7 +188,7 @@ def main(): print "Output of 'verdi calculation list -a':" try: print subprocess.check_output( - ["verdi", "calculation", "list", "-a"], + ["verdi", "calculation", "list", "-a"], stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError as e: @@ -177,7 +197,7 @@ def main(): print "Output of 'verdi work list':" try: print subprocess.check_output( - ['verdi', 'work', 'list'], + ['verdi', 'work', 'list'], stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError as e: @@ -186,7 +206,7 @@ def main(): print "Output of 'verdi daemon status':" try: print subprocess.check_output( - ["verdi", "daemon", "status"], + ["verdi", "daemon", "status"], stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError as e: @@ -204,6 +224,13 @@ def main(): timeout_secs) sys.exit(2) else: + # create cached calculations -- these should be FINISHED immediately + for counter in range(1, number_calculations + 1): + inputval = counter + calc, expected_result = create_cache_calc( + code=code, counter=counter, inputval=inputval + ) + expected_results_calculations[calc.pk] = expected_result if (validate_calculations(expected_results_calculations) and validate_workchains(expected_results_workchains)): print_daemon_log() From 326b258eb28fbd28cf651cffb939e148ab765099 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 13 Nov 2017 12:48:21 +0100 Subject: [PATCH 076/101] Add assert for 'cached_from' key in daemon test --- .travis-data/test_daemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index ead12a6375..5492a05cc3 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -145,6 +145,7 @@ def create_cache_calc(code, counter, inputval): code=code, counter=counter, inputval=inputval, use_cache=True ) print "[{}] created cached calculation.".format(counter) + assert 'cached_from' in calc.extras() return calc, expected_result def main(): From 2a6f475d0b0768d4053f7632139728179efa223c Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 10 Dec 2017 20:41:20 +0100 Subject: [PATCH 077/101] Test that the hash stays the same for completed calculations. --- .travis-data/test_daemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index 5492a05cc3..7e33e3ffe6 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -87,6 +87,7 @@ def validate_workchains(expected_results): try: calc = load_node(pk) actual_value = calc.out.output + assert calc.get_hash() == calc.get_extra('hash') except (NotExistent, AttributeError) as exception: print "* UNABLE TO RETRIEVE VALUE for workchain pk={}: I expected {}, I got {}: {}".format( pk, expected_value, type(exception), exception) From 8d7602643c4b0209a70d5b0f8a000dcf7fafef19 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 10 Dec 2017 20:44:13 +0100 Subject: [PATCH 078/101] Add 'retrieve_temporary_list' to ignored attributes for calculations. --- aiida/orm/implementation/general/calculation/job/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 80caec43c4..68eeb2814c 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -44,6 +44,7 @@ def _hash_ignored_attributes(cls): return super(AbstractJobCalculation, cls)._hash_ignored_attributes + [ 'queue_name', 'max_wallclock_seconds', + 'retrieve_temporary_list', ] def get_hash( @@ -766,7 +767,7 @@ def _set_retrieve_temporary_list(self, retrieve_temporary_list): 'strings or lists/tuples' ) - if (not (isinstance(item[0], basestring)) or + if (not (isinstance(item[0], basestring)) or not (isinstance(item[1], basestring)) or not (isinstance(item[2], int))): raise ValueError( From fb5f5dd883bfa1beb1aa39675b9f6ba0eb86729b Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Sun, 10 Dec 2017 21:07:00 +0100 Subject: [PATCH 079/101] Check for hash consistency only for cached calculations. --- .travis-data/test_daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index 7e33e3ffe6..24e712710f 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -87,7 +87,6 @@ def validate_workchains(expected_results): try: calc = load_node(pk) actual_value = calc.out.output - assert calc.get_hash() == calc.get_extra('hash') except (NotExistent, AttributeError) as exception: print "* UNABLE TO RETRIEVE VALUE for workchain pk={}: I expected {}, I got {}: {}".format( pk, expected_value, type(exception), exception) @@ -147,6 +146,7 @@ def create_cache_calc(code, counter, inputval): ) print "[{}] created cached calculation.".format(counter) assert 'cached_from' in calc.extras() + assert calc.get_hash() == calc.get_extra('hash') return calc, expected_result def main(): From fe4a5c24abfddcad828a9f1d4252bee691c3d9c4 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 12 Dec 2017 13:53:12 +0100 Subject: [PATCH 080/101] Add has_failed and has_finished methods to AbstractCalculation. --- .../general/calculation/__init__.py | 19 ++++++++++++++++++- .../general/calculation/inline.py | 4 ++++ .../general/calculation/job/__init__.py | 10 ---------- docs/source/caching/index.rst | 4 ++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index 8104a1cec9..7d54e74b2c 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -342,8 +342,25 @@ def has_finished_ok(self): """ raise NotImplementedError + @abc.abstractmethod + def has_failed(self): + """ + Returns whether the Calculation has failed. + """ + raise NotImplementedError + + def has_finished(self): + """ + Determine if the calculation is finished for whatever reason. + This may be because it finished successfully or because of a failure. + + :return: True if the job has finished running, False otherwise. + :rtype: bool + """ + return self.has_finished_ok() or self.has_failed() + def _is_valid_cache(self): - return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() + return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() and self.is_sealed def _get_objects_to_hash(self): """ diff --git a/aiida/orm/implementation/general/calculation/inline.py b/aiida/orm/implementation/general/calculation/inline.py index 2a7dc0c181..67fe75ecad 100644 --- a/aiida/orm/implementation/general/calculation/inline.py +++ b/aiida/orm/implementation/general/calculation/inline.py @@ -42,6 +42,10 @@ def get_function_name(self): def has_finished_ok(self): return self.is_sealed + def has_failed(self): + # The InlineCalculation wrapper doesn't catch errors, which means the + # calculation is returned only if it is valid. + return False def make_inline(func): """ diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 68eeb2814c..b2d3a6d5db 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -658,16 +658,6 @@ def _is_running(self): calc_states.PARSING ] - def has_finished(self): - """ - Determine if the calculation is finished for whatever reason. - This may be because it finished successfully or because of a failure. - - :return: True if the job has finished running, False otherwise. - :rtype: bool - """ - return self.has_finished_ok() or self.has_failed() - def has_finished_ok(self): """ Get whether the calculation is in the FINISHED status. diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index 01f13b33b0..d734332e7f 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -34,9 +34,9 @@ Caching is also implemented for Data nodes. This is not very useful in practice As you can see, passing ``use_cache=True`` to the ``store`` method enables using the cache. The fact that ``n2`` was created from ``n1`` is stored in the ``cached_from`` extra of ``n2``. -When running a ``JobCalculation`` through the ``Process`` interface, you cannot directly set the ``use_cache`` flag the calculation node is stored internally. Instead, you can pass the ``_use_cache`` flag to the ``run`` or ``submit`` method. +When running a ``JobCalculation`` through the ``Process`` interface, you cannot directly set the ``use_cache`` flag when the calculation node is stored internally. Instead, you can pass the ``_use_cache`` flag to the ``run`` or ``submit`` method. -Caching is **not** implemented for workchains and workfunctions. Unlike calculations, they can not only create new data nodes, but only return exsting ones. When copying a cached workchain, it's not clear which node should be returned without actually running the workchain. +Caching is **not** implemented for workchains and workfunctions. Unlike calculations, they can not only create new data nodes, but also return exsting ones. When copying a cached workchain, it's not clear which node should be returned without actually running the workchain. Configuration ------------- From b54d8405447db7ef268b9aaaf217c3a67b046b67 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 12 Dec 2017 13:54:12 +0100 Subject: [PATCH 081/101] Remove install for git plumpy version in Travis. --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index a018829077..23f643d999 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,8 +41,6 @@ before_install: install: # Upgrade pip setuptools and wheel to be able to run the next command - pip install -U pip wheel setuptools - # Unreleased plum version required for fast-forwarding - - pip install git+https://github.com/greschd/plumpy@submit_with_process_type # Install AiiDA with some optional dependencies - pip install .[REST,docs,atomic_tools,testing,dev_precommit] From dd3ca84e04811cb57d2e016eeb56363b50f7d05f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 12 Dec 2017 14:07:12 +0100 Subject: [PATCH 082/101] Add computer uuid to _objects_to_be_hashed --- aiida/orm/implementation/general/node.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index d62cac764b..811fa004a4 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1672,6 +1672,7 @@ def _get_objects_to_hash(self): """ Return a list of objects which should be included in the hash. """ + computer = self.get_computer() return [ importlib.import_module( self.__module__.split('.', 1)[0] @@ -1683,7 +1684,8 @@ def _get_objects_to_hash(self): (key not in getattr(self, '_updatable_attributes', tuple())) ) }, - self.folder + self.folder, + computer.uuid if computer is not None else None ] def rehash(self): From 7e3a771efe52c7fad0ba68f5465ace5eb853a20d Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 12 Dec 2017 14:56:32 +0100 Subject: [PATCH 083/101] Remove 'self.is_sealed' in _is_valid_cache test -- is not fulfilled for all calculations. --- aiida/orm/implementation/general/calculation/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/__init__.py b/aiida/orm/implementation/general/calculation/__init__.py index 7d54e74b2c..ddd95018fd 100644 --- a/aiida/orm/implementation/general/calculation/__init__.py +++ b/aiida/orm/implementation/general/calculation/__init__.py @@ -360,7 +360,7 @@ def has_finished(self): return self.has_finished_ok() or self.has_failed() def _is_valid_cache(self): - return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() and self.is_sealed + return super(AbstractCalculation, self)._is_valid_cache() and self.has_finished_ok() def _get_objects_to_hash(self): """ From 2fb208143861cbda359be920994d4027c68d29e2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Tue, 12 Dec 2017 16:31:02 +0100 Subject: [PATCH 084/101] Add a more thorough discussion of links and caching --- docs/source/caching/index.rst | 39 ++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index d734332e7f..eae1002daf 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -36,7 +36,7 @@ As you can see, passing ``use_cache=True`` to the ``store`` method enables using When running a ``JobCalculation`` through the ``Process`` interface, you cannot directly set the ``use_cache`` flag when the calculation node is stored internally. Instead, you can pass the ``_use_cache`` flag to the ``run`` or ``submit`` method. -Caching is **not** implemented for workchains and workfunctions. Unlike calculations, they can not only create new data nodes, but also return exsting ones. When copying a cached workchain, it's not clear which node should be returned without actually running the workchain. +Caching is **not** implemented for workchains and workfunctions. Unlike calculations, they can not only create new data nodes, but also return exsting ones. When copying a cached workchain, it's not clear which node should be returned without actually running the workchain. This is explained in more detail in the section :ref:`caching_provenance`. Configuration ------------- @@ -65,6 +65,7 @@ By default, this hash is created from: * all attributes of a node, except the ``_updatable_attributes`` * the ``__version__`` of the module which defines the node class * the content of the repository folder of the node +* the UUID of the computer, if the node has one In the case of calculations, the hashes of the inputs are also included. When developing calculation and data classes, there are some methods you can use to determine how the hash is created: @@ -76,3 +77,39 @@ Additionally, there are two methods you can use to disable caching for particula * The :meth:`._is_valid_cache` method determines whether a particular node can be used as a cache. This is used for example to disable caching from failed calculations. * Node classes have a ``_cacheable`` attribute, which can be set to ``False`` to completely switch off caching for nodes of that class. This avoids performing queries for the hash altogether. + +There are two ways in which the hash match can go wrong: False negatives, where two nodes should have the same hash but do not, or false positives, where two different nodes have the same hash. It is important to understand that false negatives are **highly preferrable**, because they only increase the runtime of your calculations, as if caching was disabled. False positives however can break the logic of your calculations. Be mindful of this when modifying the caching behaviour of your calculation and data classes. + +.. _caching_provenance: + +Caching and the Provenance Graph +-------------------------------- + +The goal of the caching mechanism is to speed up AiiDA calculations by re-using duplicate calculations. However, the resulting provenance graph should be exactly the same as if caching was disabled. This has important consequences on the kind of caching operations that are possible. + +The provenance graph consists of nodes describing data, calculations and workchains, and links describing the relationship between these nodes. We have seen that the hash of a node is used to determine whether two nodes are equivalent. To successfully use a cached node however, we also need to know how the new node should be linked to its parents and children. + +In the case of a plain data node, this is simple: Copying a data node from an equivalent node should not change its links, so we just need to preserve the links which this new node already has. + +For calculations, the situation is a bit more complex: The node can have inputs and creates new data nodes as outputs. Again, the new node needs to keep its existing links. For the outputs, the calculation needs to create a copy of each node and link these as its outputs. This makes it look as if the calculation had produced these outputs itself, without caching. + +Finally, workchains can create links not only to nodes which they create themselves, but also to nodes created by a calculation that they called, or even their ancestors. This is where caching becomes impossible. Consider the following example (using workfunctions for simplicity): + +.. code:: python + + from aiida.orm.data.base import Int + from aiida.work.workfunction import workfunction + + @workfunction + def select(a, b): + return b + + d = Int(1) + r1 = select(d, d) + r2 = select(Int(1), Int(1)) + +The two ``select`` workfunctions have the same inputs as far as their hashes go. However, the first call uses the same input node twice, while the second one has two different inputs. If the second call should be cached from the first one, it is not clear which of the two input nodes should be returned. + +While this example might seem contrived, the conclusion is valid more generally: Because workchains can return nodes from their history, they cannot be cached. Since even two equivalent workchains (with the same inputs) can have a different history, there is no way to deduce which links should be created on the new workchain without actually running it. + +Overall, this limitation is acceptable: The runtime of AiiDA workchains is usually dominated by time spent inside expensive calculations. Since these can be avoided with the caching mechanism, it still improves the runtime and required computer resources a lot. From 87994f2dc3395e91771c68bff4846724b789d62f Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 13 Dec 2017 01:56:43 +0100 Subject: [PATCH 085/101] Validate caching of 'cached' calculations when also checking for expected results --- .travis-data/test_daemon.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index 24e712710f..a06f850cce 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -98,6 +98,16 @@ def validate_workchains(expected_results): return valid +def validate_cached(cached_calcs): + """ + Check that the calculations with created with caching are indeed cached. + """ + return all( + 'cached_from' in calc.extras() and + calc.get_hash() == calc.get_extra('hash') + for calc in cached_calcs + ) + def create_calculation(code, counter, inputval, use_cache=False): parameters = ParameterData(dict={'value': inputval}) template = ParameterData(dict={ @@ -145,8 +155,6 @@ def create_cache_calc(code, counter, inputval): code=code, counter=counter, inputval=inputval, use_cache=True ) print "[{}] created cached calculation.".format(counter) - assert 'cached_from' in calc.extras() - assert calc.get_hash() == calc.get_extra('hash') return calc, expected_result def main(): @@ -227,14 +235,17 @@ def main(): sys.exit(2) else: # create cached calculations -- these should be FINISHED immediately + cached_calcs = [] for counter in range(1, number_calculations + 1): inputval = counter calc, expected_result = create_cache_calc( code=code, counter=counter, inputval=inputval ) + cached_calcs.append(calc) expected_results_calculations[calc.pk] = expected_result if (validate_calculations(expected_results_calculations) - and validate_workchains(expected_results_workchains)): + and validate_workchains(expected_results_workchains) + and validate_cached(cached_calcs)): print_daemon_log() print "" print "OK, all calculations have the expected parsed result" From 971723554bfe35b715471b39b7599332065ad878 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 15:56:29 +0100 Subject: [PATCH 086/101] * add 'get_all_same_nodes' function, returns a list of all matching nodes * rename 'get_same_node' to '_get_same_node' * add documentation for both --- aiida/orm/implementation/general/node.py | 30 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 04514acb74..038db011f2 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1573,7 +1573,7 @@ def store(self, with_transaction=True, use_cache=None): if use_cache is None: use_cache = get_use_cache(type(self)) # Retrieve the cached node. - same_node = self.get_same_node() if use_cache else None + same_node = self._get_same_node() if use_cache else None if same_node is not None: self._store_from_cache(same_node, with_transaction=with_transaction) self._add_outputs_from_cache(same_node) @@ -1690,9 +1690,27 @@ def _get_objects_to_hash(self): def rehash(self): self.set_extra('hash', self.get_hash()) - def get_same_node(self): + def _get_same_node(self): + """ + Returns a stored node from which the current Node can be cached, meaning that the returned Node is a valid cache, and its ``_aiida_hash`` attribute matches ``self.get_hash()``. + + If there are multiple valid matches, the first one is returned. If no matches are found, ``None`` is returned. + + Note that after ``self`` is stored, this function can return ``self``. + """ + same_nodes = self.get_all_same_nodes() + if same_nodes: + return same_nodes[0] + return None + + def get_all_same_nodes(self): + """ + Return a list of stored nodes which match the type and hash of the current node. For the stored nodes, the ``_aiida_hash`` extra is checked to determine the hash, while ``self.get_hash()`` is executed on the current node. + + Only nodes which are a valid cache are returned. If the current node is already stored, it can be included in the returned list if ``self.get_hash()`` matches its ``_aiida_hash``. + """ if not self._cacheable: - return None + return [] from aiida.orm.querybuilder import QueryBuilder @@ -1700,10 +1718,8 @@ def get_same_node(self): if hash_: qb = QueryBuilder() qb.append(self.__class__, filters={'extras.hash': hash_}, project='*', subclassing=False) - for same_node, in qb.iterall(): - if same_node._is_valid_cache(): - return same_node - return None + same_nodes = [n[0] for n in qb.all()] + return [n for n in same_nodes if n._is_valid_cache()] def _is_valid_cache(self): """ From be36c3adef181b4fcf70cd67a088bbfffc778eaa Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 16:42:44 +0100 Subject: [PATCH 087/101] Add '_iter_all_same_nodes' helper function, which returns an iterator over all same nodes. The reason for this is to avoid using '.all()' in the query when only the first matching element is needed. --- aiida/orm/implementation/general/node.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 038db011f2..9d5fed6d21 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1698,10 +1698,10 @@ def _get_same_node(self): Note that after ``self`` is stored, this function can return ``self``. """ - same_nodes = self.get_all_same_nodes() - if same_nodes: - return same_nodes[0] - return None + try: + return next(self._iter_all_same_nodes()) + except StopIteration: + return None def get_all_same_nodes(self): """ @@ -1709,8 +1709,14 @@ def get_all_same_nodes(self): Only nodes which are a valid cache are returned. If the current node is already stored, it can be included in the returned list if ``self.get_hash()`` matches its ``_aiida_hash``. """ + return list(self._iter_all_same_nodes()) + + def _iter_all_same_nodes(self): + """ + Returns an iterator of all same nodes. + """ if not self._cacheable: - return [] + return iter(()) from aiida.orm.querybuilder import QueryBuilder @@ -1718,8 +1724,8 @@ def get_all_same_nodes(self): if hash_: qb = QueryBuilder() qb.append(self.__class__, filters={'extras.hash': hash_}, project='*', subclassing=False) - same_nodes = [n[0] for n in qb.all()] - return [n for n in same_nodes if n._is_valid_cache()] + same_nodes = (n[0] for n in qb.iterall()) + return (n for n in same_nodes if n._is_valid_cache()) def _is_valid_cache(self): """ From c413145df55cd47e468cb6cb480ceac1dc0a147a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 16:54:27 +0100 Subject: [PATCH 088/101] Rename 'cached_from' and 'hash' extras by prepending '_aiida_'. --- .travis-data/test_daemon.py | 4 ++-- aiida/backends/tests/inline_calculation.py | 4 ++-- aiida/backends/tests/nodes.py | 10 +++++----- aiida/backends/tests/work/workfunction.py | 2 +- aiida/orm/implementation/django/node.py | 2 +- aiida/orm/implementation/general/node.py | 5 +---- aiida/orm/implementation/sqlalchemy/node.py | 2 +- docs/source/caching/index.rst | 6 +++--- 8 files changed, 16 insertions(+), 19 deletions(-) diff --git a/.travis-data/test_daemon.py b/.travis-data/test_daemon.py index a06f850cce..302939f0ec 100644 --- a/.travis-data/test_daemon.py +++ b/.travis-data/test_daemon.py @@ -103,8 +103,8 @@ def validate_cached(cached_calcs): Check that the calculations with created with caching are indeed cached. """ return all( - 'cached_from' in calc.extras() and - calc.get_hash() == calc.get_extra('hash') + '_aiida_cached_from' in calc.extras() and + calc.get_hash() == calc.get_extra('_aiida_hash') for calc in cached_calcs ) diff --git a/aiida/backends/tests/inline_calculation.py b/aiida/backends/tests/inline_calculation.py index a62beb916c..e0232e32e6 100644 --- a/aiida/backends/tests/inline_calculation.py +++ b/aiida/backends/tests/inline_calculation.py @@ -40,7 +40,7 @@ def test_caching(self): calc1, res1 = self.incr_inline(inp=Int(11)) calc2, res2 = self.incr_inline(inp=Int(11)) self.assertEquals(res1['res'].value, res2['res'].value, 12) - self.assertEquals(calc1.get_extra('cached_from', calc1.uuid), calc2.get_extra('cached_from')) + self.assertEquals(calc1.get_extra('_aiida_cached_from', calc1.uuid), calc2.get_extra('_aiida_cached_from')) def test_caching_change_code(self): with enable_caching(InlineCalculation): @@ -52,4 +52,4 @@ def incr_inline(inp): calc2, res2 = incr_inline(inp=Int(11)) self.assertNotEquals(res1['res'].value, res2['res'].value) - self.assertFalse('cached_from' in calc2.extras()) + self.assertFalse('_aiida_cached_from' in calc2.extras()) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 52307ac087..43f243013b 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -47,7 +47,7 @@ def test_simple_equal_nodes(self): n2 = self.create_simple_node(*attr) n1.store(use_cache=True) n2.store(use_cache=True) - self.assertEqual(n1.uuid, n2.get_extra('cached_from')) + self.assertEqual(n1.uuid, n2.get_extra('_aiida_cached_from')) @staticmethod def create_folderdata_with_empty_file(): @@ -79,14 +79,14 @@ def test_folder_same(self): f2 = self.create_folderdata_with_empty_folder() f1.store() f2.store(use_cache=True) - assert f1.uuid == f2.get_extra('cached_from') + assert f1.uuid == f2.get_extra('_aiida_cached_from') def test_file_same(self): f1 = self.create_folderdata_with_empty_file() f2 = self.create_folderdata_with_empty_file() f1.store() f2.store(use_cache=True) - assert f1.uuid == f2.get_extra('cached_from') + assert f1.uuid == f2.get_extra('_aiida_cached_from') def test_simple_unequal_nodes(self): attributes = [ @@ -99,7 +99,7 @@ def test_simple_unequal_nodes(self): n1.store() n2.store(use_cache=True) self.assertNotEquals(n1.uuid, n2.uuid) - self.assertFalse('cached_from' in n2.extras()) + self.assertFalse('_aiida_cached_from' in n2.extras()) def test_unequal_arrays(self): import numpy as np @@ -119,7 +119,7 @@ def create_arraydata(arr): a2 = create_arraydata(arr2) a2.store(use_cache=True) self.assertNotEquals(a1.uuid, a2.uuid) - self.assertFalse('cached_from' in a2.extras()) + self.assertFalse('_aiida_cached_from' in a2.extras()) def test_updatable_attributes(self): """ diff --git a/aiida/backends/tests/work/workfunction.py b/aiida/backends/tests/work/workfunction.py index 88f5d42d37..c2597b72ab 100644 --- a/aiida/backends/tests/work/workfunction.py +++ b/aiida/backends/tests/work/workfunction.py @@ -62,4 +62,4 @@ def test_hashes_different(self): def _check_hash_consistent(self, pid): wc = load_node(pid) - self.assertEqual(wc.get_hash(), wc.get_extra('hash')) + self.assertEqual(wc.get_hash(), wc.get_extra('_aiida_hash')) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index acb1028593..d2e204f39a 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -619,6 +619,6 @@ def _db_store(self, with_transaction=True): from aiida.backends.djsite.db.models import DbExtra # I store the hash without cleaning and without incrementing the nodeversion number - DbExtra.set_value_for_node(self.dbnode, 'hash', self.get_hash()) + DbExtra.set_value_for_node(self.dbnode, '_aiida_hash', self.get_hash()) return self diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 9d5fed6d21..5ef5eb14d8 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1141,9 +1141,6 @@ def iterextras(self): return yield # Needed after return to convert it to a generator for extra in self._db_iterextras(): - # Don't return if key == hash - if extra[0] == 'hash': - continue yield extra def iterattrs(self): @@ -1614,7 +1611,7 @@ def _store_from_cache(self, cache_node, with_transaction): raise ValueError("Cannot use cache from nodes with RETURN links.") self.store(with_transaction=with_transaction, use_cache=False) - self.set_extra('cached_from', cache_node.uuid) + self.set_extra('_aiida_cached_from', cache_node.uuid) def _add_outputs_from_cache(self, cache_node): # add CREATE links diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 9a67319482..0eb96a3022 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -658,7 +658,7 @@ def _db_store(self, with_transaction=True): self._repository_folder.abspath, move=True, overwrite=True) raise - self.dbnode.set_extra('hash', self.get_hash()) + self.dbnode.set_extra('_aiida_hash', self.get_hash()) return self diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index eae1002daf..f0dec1afcb 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -29,10 +29,10 @@ Caching is also implemented for Data nodes. This is not very useful in practice In [7]: print('UUID of n1:', n1.uuid) UUID of n1: 956109e1-4382-4240-a711-2a4f3b522122 - In [8]: print('n2 is cached from:', n2.get_extra('cached_from')) + In [8]: print('n2 is cached from:', n2.get_extra('_aiida_cached_from')) n2 is cached from: 956109e1-4382-4240-a711-2a4f3b522122 -As you can see, passing ``use_cache=True`` to the ``store`` method enables using the cache. The fact that ``n2`` was created from ``n1`` is stored in the ``cached_from`` extra of ``n2``. +As you can see, passing ``use_cache=True`` to the ``store`` method enables using the cache. The fact that ``n2`` was created from ``n1`` is stored in the ``_aiida_cached_from`` extra of ``n2``. When running a ``JobCalculation`` through the ``Process`` interface, you cannot directly set the ``use_cache`` flag when the calculation node is stored internally. Instead, you can pass the ``_use_cache`` flag to the ``run`` or ``submit`` method. @@ -58,7 +58,7 @@ This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str How are cached nodes matched? ----------------------------- -To determine wheter a given node is identical to an existing one, a hash of the content of the node is created. If a node of the same class with the same hash already exists in the database, this is considered a cache match. You can manually check the hash of a given node with the :meth:`.get_hash() <.AbstractNode.get_hash>` method. +To determine wheter a given node is identical to an existing one, a hash of the content of the node is created. If a node of the same class with the same hash already exists in the database, this is considered a cache match. You can manually check the hash of a given node with the :meth:`.get_hash() <.AbstractNode.get_hash>` method. Once a node is stored in the database, its hash is stored in the ``_aiida_hash`` extra, and this is used to find matching nodes. By default, this hash is created from: From acde1c3a00221ff6af8b08ba8a1bc4d053588fca Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 17:01:25 +0100 Subject: [PATCH 089/101] Define constant for '_aiida_hash' key --- aiida/orm/implementation/django/node.py | 4 ++-- aiida/orm/implementation/general/node.py | 4 ++-- aiida/orm/implementation/sqlalchemy/node.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida/orm/implementation/django/node.py b/aiida/orm/implementation/django/node.py index d2e204f39a..1113a1afe7 100644 --- a/aiida/orm/implementation/django/node.py +++ b/aiida/orm/implementation/django/node.py @@ -21,7 +21,7 @@ from aiida.common.folders import RepositoryFolder from aiida.common.links import LinkType from aiida.common.utils import get_new_uuid -from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT +from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT, _HASH_EXTRA_KEY from aiida.orm.mixins import Sealable # from aiida.orm.implementation.django.utils import get_db_columns from aiida.orm.implementation.general.utils import get_db_columns @@ -619,6 +619,6 @@ def _db_store(self, with_transaction=True): from aiida.backends.djsite.db.models import DbExtra # I store the hash without cleaning and without incrementing the nodeversion number - DbExtra.set_value_for_node(self.dbnode, '_aiida_hash', self.get_hash()) + DbExtra.set_value_for_node(self.dbnode, _HASH_EXTRA_KEY, self.get_hash()) return self diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 5ef5eb14d8..d1bdb86a0c 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -31,7 +31,7 @@ from aiida.backends.utils import validate_attribute_key _NO_DEFAULT = tuple() - +_HASH_EXTRA_KEY = '_aiida_hash' def clean_value(value): """ @@ -1685,7 +1685,7 @@ def _get_objects_to_hash(self): ] def rehash(self): - self.set_extra('hash', self.get_hash()) + self.set_extra(_HASH_EXTRA_KEY, self.get_hash()) def _get_same_node(self): """ diff --git a/aiida/orm/implementation/sqlalchemy/node.py b/aiida/orm/implementation/sqlalchemy/node.py index 0eb96a3022..897bea6ccc 100644 --- a/aiida/orm/implementation/sqlalchemy/node.py +++ b/aiida/orm/implementation/sqlalchemy/node.py @@ -28,7 +28,7 @@ NotExistent, UniquenessError) from aiida.common.links import LinkType -from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT +from aiida.orm.implementation.general.node import AbstractNode, _NO_DEFAULT, _HASH_EXTRA_KEY from aiida.orm.implementation.sqlalchemy.computer import Computer from aiida.orm.implementation.sqlalchemy.group import Group from aiida.orm.implementation.sqlalchemy.utils import django_filter, \ @@ -658,7 +658,7 @@ def _db_store(self, with_transaction=True): self._repository_folder.abspath, move=True, overwrite=True) raise - self.dbnode.set_extra('_aiida_hash', self.get_hash()) + self.dbnode.set_extra(_HASH_EXTRA_KEY, self.get_hash()) return self From 4101b15637f6b42a6015c5f8e53a4941d57a7dd4 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 17:16:11 +0100 Subject: [PATCH 090/101] Fix filter in '_iter_same_node': Rename hash to _aiida_hash --- aiida/orm/implementation/general/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index d1bdb86a0c..649212a2f3 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1720,7 +1720,7 @@ def _iter_all_same_nodes(self): hash_ = self.get_hash() if hash_: qb = QueryBuilder() - qb.append(self.__class__, filters={'extras.hash': hash_}, project='*', subclassing=False) + qb.append(self.__class__, filters={'extras._aiida_hash': hash_}, project='*', subclassing=False) same_nodes = (n[0] for n in qb.iterall()) return (n for n in same_nodes if n._is_valid_cache()) From 107e4443d89c88e30810fc47f4b7042cfd633b1e Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 17:48:26 +0100 Subject: [PATCH 091/101] Fix tests which compared extras and didn't expect the '_aiida_hash' extra. --- aiida/backends/djsite/db/subtests/generic.py | 4 +-- aiida/backends/sqlalchemy/tests/generic.py | 4 +-- aiida/backends/tests/nodes.py | 35 ++++++++++++++------ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/aiida/backends/djsite/db/subtests/generic.py b/aiida/backends/djsite/db/subtests/generic.py index 18fcebe550..154b01fabe 100644 --- a/aiida/backends/djsite/db/subtests/generic.py +++ b/aiida/backends/djsite/db/subtests/generic.py @@ -137,7 +137,7 @@ def test_replacement_1(self): 'pippobis': [5, 6, 'c']}) self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) - new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}]} + new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}], '_aiida_hash': n1.get_hash()} DbExtra.reset_values_for_node(n1.dbnode, attributes=new_attrs) self.assertEquals(n1.get_extras(), new_attrs) @@ -147,4 +147,4 @@ def test_replacement_1(self): del new_attrs['newval2'] self.assertEquals(n1.get_extras(), new_attrs) # Also check that other nodes were not damaged - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], '_aiida_hash': n2.get_hash()}) diff --git a/aiida/backends/sqlalchemy/tests/generic.py b/aiida/backends/sqlalchemy/tests/generic.py index c63e8c1d6f..3d0096c57f 100644 --- a/aiida/backends/sqlalchemy/tests/generic.py +++ b/aiida/backends/sqlalchemy/tests/generic.py @@ -145,7 +145,7 @@ def test_replacement_1(self): self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) - new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}]} + new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}], "_aiida_hash": n1.get_hash()} n1.reset_extras(new_attrs) self.assertEquals(n1.get_extras(), new_attrs) @@ -155,4 +155,4 @@ def test_replacement_1(self): del new_attrs['newval2'] self.assertEquals(n1.get_extras(), new_attrs) # Also check that other nodes were not damaged - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], '_aiida_hash': n2.get_hash()}) diff --git a/aiida/backends/tests/nodes.py b/aiida/backends/tests/nodes.py index 43f243013b..63b822871c 100644 --- a/aiida/backends/tests/nodes.py +++ b/aiida/backends/tests/nodes.py @@ -589,6 +589,7 @@ def test_attributes_on_copy(self): 'bool': 'some non-boolean value', 'some_other_name': 987 } + all_extras = dict(_aiida_hash=AnyValue(), **extras_to_set) for k, v in extras_to_set.iteritems(): a.set_extra(k, v) @@ -613,12 +614,12 @@ def test_attributes_on_copy(self): b.store() # and I finally add a extras b.set_extra('meta', 'textofext') - b_expected_extras = {'meta': 'textofext'} + b_expected_extras = {'meta': 'textofext', '_aiida_hash': AnyValue()} # Now I check for the attributes # First I check that nothing has changed self.assertEquals({k: v for k, v in a.iterattrs()}, attrs_to_set) - self.assertEquals({k: v for k, v in a.iterextras()}, extras_to_set) + self.assertEquals({k: v for k, v in a.iterextras()}, all_extras) # I check then on the 'b' copy self.assertEquals({k: v @@ -943,7 +944,7 @@ def test_attr_and_extras(self): self.assertEquals(self.boolval, a.get_attr('bool')) self.assertEquals(a_string, a.get_extra('bool')) - self.assertEquals(a.get_extras(), {'bool': a_string}) + self.assertEquals(a.get_extras(), {'bool': a_string, '_aiida_hash': AnyValue()}) def test_get_extras_with_default(self): a = Node() @@ -1021,14 +1022,16 @@ def test_attr_listing(self): for k, v in extras_to_set.iteritems(): a.set_extra(k, v) + all_extras = dict(_aiida_hash=AnyValue(), **extras_to_set) + self.assertEquals(set(a.attrs()), set(attrs_to_set.keys())) - self.assertEquals(set(a.extras()), set(extras_to_set.keys())) + self.assertEquals(set(a.extras()), set(all_extras.keys())) returned_internal_attrs = {k: v for k, v in a.iterattrs()} self.assertEquals(returned_internal_attrs, attrs_to_set) returned_attrs = {k: v for k, v in a.iterextras()} - self.assertEquals(returned_attrs, extras_to_set) + self.assertEquals(returned_attrs, all_extras) def test_versioning_and_postsave_attributes(self): """ @@ -1096,10 +1099,12 @@ def test_delete_extras(self): 'further': 267, } + all_extras = dict(_aiida_hash=AnyValue(), **extras_to_set) + for k, v in extras_to_set.iteritems(): a.set_extra(k, v) - self.assertEquals({k: v for k, v in a.iterextras()}, extras_to_set) + self.assertEquals({k: v for k, v in a.iterextras()}, all_extras) # I pregenerate it, it cannot change during iteration list_keys = list(extras_to_set.keys()) @@ -1107,8 +1112,8 @@ def test_delete_extras(self): # I delete one by one the keys and check if the operation is # performed correctly a.del_extra(k) - del extras_to_set[k] - self.assertEquals({k: v for k, v in a.iterextras()}, extras_to_set) + del all_extras[k] + self.assertEquals({k: v for k, v in a.iterextras()}, all_extras) def test_replace_extras_1(self): """ @@ -1132,6 +1137,7 @@ def test_replace_extras_1(self): 'h': 'j' }, [9, 8, 7]], } + all_extras = dict(_aiida_hash=AnyValue(), **extras_to_set) # I redefine the keys with more complicated data, and # changing the data type too @@ -1150,7 +1156,7 @@ def test_replace_extras_1(self): for k, v in extras_to_set.iteritems(): a.set_extra(k, v) - self.assertEquals({k: v for k, v in a.iterextras()}, extras_to_set) + self.assertEquals({k: v for k, v in a.iterextras()}, all_extras) for k, v in new_extras.iteritems(): # I delete one by one the keys and check if the operation is @@ -1159,8 +1165,8 @@ def test_replace_extras_1(self): # I update extras_to_set with the new entries, and do the comparison # again - extras_to_set.update(new_extras) - self.assertEquals({k: v for k, v in a.iterextras()}, extras_to_set) + all_extras.update(new_extras) + self.assertEquals({k: v for k, v in a.iterextras()}, all_extras) def test_basetype_as_attr(self): """ @@ -2068,3 +2074,10 @@ def test_check_single_calc_source(self): # more than one input to the same data object! with self.assertRaises(ValueError): d1.add_link_from(calc2, link_type=LinkType.CREATE) + +class AnyValue(object): + """ + Helper class that compares equal to everything. + """ + def __eq__(self, other): + return True From 1f5bf075720cd5f235a766f01402930f022fd810 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 18:59:31 +0100 Subject: [PATCH 092/101] Fix TestDbExtrasDjango to take into account '_aiida_hash'. --- aiida/backends/djsite/db/subtests/generic.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/aiida/backends/djsite/db/subtests/generic.py b/aiida/backends/djsite/db/subtests/generic.py index 154b01fabe..bdc5c60614 100644 --- a/aiida/backends/djsite/db/subtests/generic.py +++ b/aiida/backends/djsite/db/subtests/generic.py @@ -134,14 +134,18 @@ def test_replacement_1(self): DbExtra.set_value_for_node(n2.dbnode, "pippo2", [3, 4, 'b']) self.assertEquals(n1.get_extras(), {'pippo': [1, 2, 'a'], - 'pippobis': [5, 6, 'c']}) - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + 'pippobis': [5, 6, 'c'], + '_aiida_hash': n1.get_hash() + }) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], + '_aiida_hash': n2.get_hash() + }) - new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}], '_aiida_hash': n1.get_hash()} + new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}]} DbExtra.reset_values_for_node(n1.dbnode, attributes=new_attrs) self.assertEquals(n1.get_extras(), new_attrs) - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], '_aiida_hash': n2.get_hash()}) DbExtra.del_value_for_node(n1.dbnode, key='newval2') del new_attrs['newval2'] From 89c362662854016be6422d1d603451fdc15b9689 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Mon, 29 Jan 2018 23:18:26 +0100 Subject: [PATCH 093/101] Fix SQLA test_replacement_1 test to work with '_aiida_hash' --- aiida/backends/sqlalchemy/tests/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiida/backends/sqlalchemy/tests/generic.py b/aiida/backends/sqlalchemy/tests/generic.py index 3d0096c57f..5ab37243b0 100644 --- a/aiida/backends/sqlalchemy/tests/generic.py +++ b/aiida/backends/sqlalchemy/tests/generic.py @@ -141,15 +141,15 @@ def test_replacement_1(self): n2.set_extra("pippo2", [3, 4, u'b']) - self.assertEqual(n1.get_extras(),{'pippo': [1, 2, u'a'], 'pippobis': [5, 6, u'c']}) + self.assertEqual(n1.get_extras(),{'pippo': [1, 2, u'a'], 'pippobis': [5, 6, u'c'], '_aiida_hash': n1.get_hash()}) - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], '_aiida_hash': n2.get_hash()}) - new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}], "_aiida_hash": n1.get_hash()} + new_attrs = {"newval1": "v", "newval2": [1, {"c": "d", "e": 2}]} n1.reset_extras(new_attrs) self.assertEquals(n1.get_extras(), new_attrs) - self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b']}) + self.assertEquals(n2.get_extras(), {'pippo2': [3, 4, 'b'], '_aiida_hash': n2.get_hash()}) n1.del_extra('newval2') del new_attrs['newval2'] From 7b4d164dfb9086bd5a11aeb3dc96a5ee0c969d5d Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 31 Jan 2018 11:24:39 +0100 Subject: [PATCH 094/101] Fix error message where 'fast-forwarding' is mentioned. --- aiida/common/caching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/common/caching.py b/aiida/common/caching.py index b0cec16e60..383c766b6f 100644 --- a/aiida/common/caching.py +++ b/aiida/common/caching.py @@ -87,7 +87,7 @@ def get_use_cache(node_class=None): enabled = node_class in _CONFIG[config_keys.enabled] disabled = node_class in _CONFIG[config_keys.disabled] if enabled and disabled: - raise ValueError('Invalid configuration: Fast-forwarding for {} is both enabled and disabled.'.format(node_class)) + raise ValueError('Invalid configuration: Caching for {} is both enabled and disabled.'.format(node_class)) elif enabled: return True elif disabled: From e515f149853de2256b36ea3492a80cd6fc78f3f2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 31 Jan 2018 11:53:17 +0100 Subject: [PATCH 095/101] Remove 'retrieve_temporary_list' from ignored hash attributes. --- aiida/orm/implementation/general/calculation/job/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index bec7e43fee..3f39f93b4a 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -44,7 +44,6 @@ def _hash_ignored_attributes(cls): return super(AbstractJobCalculation, cls)._hash_ignored_attributes + [ 'queue_name', 'max_wallclock_seconds', - 'retrieve_temporary_list', ] def get_hash( From 99bad1e3701c5ccf0fc8bbed8da4116dd75de14a Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Wed, 31 Jan 2018 13:24:31 +0100 Subject: [PATCH 096/101] Add doc for what to do when caching is triggered in error, add clear_cache function --- aiida/orm/implementation/general/node.py | 9 +++++++++ docs/source/caching/index.rst | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/aiida/orm/implementation/general/node.py b/aiida/orm/implementation/general/node.py index 649212a2f3..ed0a871d5e 100644 --- a/aiida/orm/implementation/general/node.py +++ b/aiida/orm/implementation/general/node.py @@ -1685,8 +1685,17 @@ def _get_objects_to_hash(self): ] def rehash(self): + """ + Re-generates the stored hash of the Node. + """ self.set_extra(_HASH_EXTRA_KEY, self.get_hash()) + def clear_hash(self): + """ + Sets the stored hash of the Node to None. + """ + self.set_extra(_HASH_EXTRA_KEY, None) + def _get_same_node(self): """ Returns a stored node from which the current Node can be cached, meaning that the returned Node is a valid cache, and its ``_aiida_hash`` attribute matches ``self.get_hash()``. diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index f0dec1afcb..40f4c97718 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -80,6 +80,28 @@ Additionally, there are two methods you can use to disable caching for particula There are two ways in which the hash match can go wrong: False negatives, where two nodes should have the same hash but do not, or false positives, where two different nodes have the same hash. It is important to understand that false negatives are **highly preferrable**, because they only increase the runtime of your calculations, as if caching was disabled. False positives however can break the logic of your calculations. Be mindful of this when modifying the caching behaviour of your calculation and data classes. +.. _caching_error: + +What to do when caching is used when it shouldn't +------------------------------------------------- + +In general, the caching mechanism should trigger only when the output of a calculation will be exactly the same as if it is run again. However, there might be some edge cases where this is not true. + +For example, if the parser is in a different python module than the calculation, the version number used in the hash will not change when the parser is updated. While the "correct" solution to this problem is to increase the version number of a calculation when the behavior of its parser changes, there might still be cases (e.g. during development) when you manually want to stop a particular node from being cached. + +In such cases, you can follow these steps to disable caching: + +1. If you suspect that a node has been cached in error, check that it has a ``_aiida_cached_from`` extra. If that's not the case, it is not a problem of caching. +2. Get all nodes which match your node, and clear their hash: + + .. code:: python + + for n in node.get_all_same_nodes(): + n.clear_hash() +3. Run your calculation again. Now it should not use caching. + +If you instead think that there is a bug in the AiiDA implementation, please open an issue (with enough information to be able to reproduce the error, otherwise it is hard for us to help you) in the AiiDA GitHub repository: https://github.com/aiidateam/aiida_core/issues/new. + .. _caching_provenance: Caching and the Provenance Graph From fe1f5e8d9f7b7c940a1e63f86d955336d4ffbbe3 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 1 Feb 2018 10:52:45 +0100 Subject: [PATCH 097/101] Add max_memory_kb and priority to the attributes ignored in JobCalculations --- aiida/orm/implementation/general/calculation/job/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 3f39f93b4a..75eeb35a5c 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -43,7 +43,9 @@ def _hash_ignored_attributes(cls): # _updatable_attributes are ignored automatically. return super(AbstractJobCalculation, cls)._hash_ignored_attributes + [ 'queue_name', + 'priority', 'max_wallclock_seconds', + 'max_memory_kb', ] def get_hash( From b7c1e71ac1f9a926d75afd8be50c1470d7e475d8 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 1 Feb 2018 10:58:20 +0100 Subject: [PATCH 098/101] Add description of '_hash_ignored_inputs' --- docs/source/caching/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index 40f4c97718..cf5e7e699c 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -70,6 +70,7 @@ By default, this hash is created from: In the case of calculations, the hashes of the inputs are also included. When developing calculation and data classes, there are some methods you can use to determine how the hash is created: * To ignore specific attributes, a ``Node`` subclass can have a ``_hash_ignored_attributes`` attribute. This is a list of attribute names which are ignored when creating the hash. +* For calculations, the ``_hash_ignored_inputs`` attribute lists inputs that should be ignored when creating the hash. * To add things which should be considered in the hash, you can override the :meth:`_get_objects_to_hash <.AbstractNode._get_objects_to_hash>` method. Note that doing so overrides the behavior described above, so you should make sure to use the ``super()`` method. * Pass a keyword argument to :meth:`.get_hash <.AbstractNode.get_hash>`. These are passed on to ``aiida.common.hashing.make_hash``. For example, the ``ignored_folder_content`` keyword is used by the :class:`JobCalculation <.AbstractJobCalculation>` to ignore the ``raw_input`` subfolder of its repository folder. From 1b4839e5c9a96203d6f21defcee2a886d31a77b6 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 1 Feb 2018 11:29:03 +0100 Subject: [PATCH 099/101] Add code to show how to get the full class name --- docs/source/caching/index.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index cf5e7e699c..af7a8bdb4b 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -53,7 +53,18 @@ Of course, using caching would be quite tedious if you had to set ``use_cache`` disabled: - aiida.orm.data.base.Float -This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str``, and disabled for all other classes. In this example, manually disabling ``aiida.orm.data.base.Float`` is actually not needed, since the default value for all classes is already ``False``. Note also that the fully qualified class import name (e.g., ``aiida.orm.data.base.Str``) must be given, not just the class name (``Str``). This is to avoid accidentally matching classes with the same name. +This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str``, and disabled for all other classes. In this example, manually disabling ``aiida.orm.data.base.Float`` is actually not needed, since the default value for all classes is already ``False``. Note also that the fully qualified class import name (e.g., ``aiida.orm.data.base.Str``) must be given, not just the class name (``Str``). This is to avoid accidentally matching classes with the same name. You can get this name by combining the module name and class name, or (usually) from the string representation of the class: + +.. ipython:: + :verbatim: + + In [1]: Str.__module__ + '.' + Str.__name__ + Out[1]: 'aiida.orm.data.base.Str' + + In [2]: str(Str) + Out[2]: "" + +Note that this is not the same as the type string stored in the database. How are cached nodes matched? ----------------------------- From 01f90043611666184967ec3992d447ef1a0fb1e2 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 1 Feb 2018 12:26:42 +0100 Subject: [PATCH 100/101] Remove workchain caching test, add caching section to dev guide --- aiida/backends/tests/work/workChain.py | 50 ------------------------- docs/source/caching/index.rst | 4 +- docs/source/developer_guide/caching.rst | 12 ++++++ docs/source/developer_guide/index.rst | 1 + 4 files changed, 16 insertions(+), 51 deletions(-) create mode 100644 docs/source/developer_guide/caching.rst diff --git a/aiida/backends/tests/work/workChain.py b/aiida/backends/tests/work/workChain.py index c6f3fe008d..a54fed4af2 100644 --- a/aiida/backends/tests/work/workChain.py +++ b/aiida/backends/tests/work/workChain.py @@ -361,56 +361,6 @@ def _run_with_checkpoints(self, wf_class, inputs=None): return finished_steps -class TestFastForwardingWorkChain(TestWorkchain): - def setUp(self): - super(TestFastForwardingWorkChain, self).setUp() - class ReturnInputsFastForward(WorkChain): - @classmethod - def define(cls, spec): - super(ReturnInputsFastForward, cls).define(spec) - spec.input('a', valid_type=Int) - spec.input('b', valid_type=Int, default=Int(2), required=False) - spec.outline(cls.return_inputs) - spec.deterministic() - - def return_inputs(self): - self.out('a', self.inputs.a) - self.out('b', self.inputs.b) - self.wf_class = ReturnInputsFastForward - self.reference_result, self.reference_pid = run( - self.wf_class, a=Int(1), b=Int(2), _return_pid=True - ) - self.reference_wc = load_node(self.reference_pid) - - class ReturnInputsFastForward(WorkChain): - @classmethod - def define(cls, spec): - super(ReturnInputsFastForward, cls).define(spec) - spec.input('a', valid_type=Int) - spec.input('b', valid_type=Int, default=Int(2), required=False) - spec.outline(cls.return_inputs) - spec.deterministic() - - def return_inputs(self): - raise ValueError - - self.wf_class_broken = ReturnInputsFastForward - - def tearDown(self): - super(TestFastForwardingWorkChain, self).tearDown() - super(TestFastForwardingWorkChain, self).tearDownClass() - super(TestFastForwardingWorkChain, self).setUpClass() - - def test_hash(self): - res, pid = run( - self.wf_class, - a=Int(1), b=Int(2), - _use_cache=True, _return_pid=True - ) - wc = load_node(pid) - self.assertEquals(wc.get_hash(), self.reference_wc.get_hash()) - self.assertNotEquals(wc.get_hash(), None) - class TestWorkchainWithOldWorkflows(AiidaTestCase): diff --git a/docs/source/caching/index.rst b/docs/source/caching/index.rst index af7a8bdb4b..e730dbcaca 100644 --- a/docs/source/caching/index.rst +++ b/docs/source/caching/index.rst @@ -53,7 +53,7 @@ Of course, using caching would be quite tedious if you had to set ``use_cache`` disabled: - aiida.orm.data.base.Float -This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str``, and disabled for all other classes. In this example, manually disabling ``aiida.orm.data.base.Float`` is actually not needed, since the default value for all classes is already ``False``. Note also that the fully qualified class import name (e.g., ``aiida.orm.data.base.Str``) must be given, not just the class name (``Str``). This is to avoid accidentally matching classes with the same name. You can get this name by combining the module name and class name, or (usually) from the string representation of the class: +This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str``, and disabled for all other classes. In this example, manually disabling ``aiida.orm.data.base.Float`` is actually not needed, since the ``default: False`` configuration means that caching is disabled for all classes unless it is manually enabled. Note also that the fully qualified class import name (e.g., ``aiida.orm.data.base.Str``) must be given, not just the class name (``Str``). This is to avoid accidentally matching classes with the same name. You can get this name by combining the module name and class name, or (usually) from the string representation of the class: .. ipython:: :verbatim: @@ -66,6 +66,8 @@ This means that caching is enabled for ``TemplatereplacerCalculation`` and ``Str Note that this is not the same as the type string stored in the database. +.. _caching_matches: + How are cached nodes matched? ----------------------------- diff --git a/docs/source/developer_guide/caching.rst b/docs/source/developer_guide/caching.rst new file mode 100644 index 0000000000..9cca41f04b --- /dev/null +++ b/docs/source/developer_guide/caching.rst @@ -0,0 +1,12 @@ +Caching: implementation details ++++++++++++++++++++++++++++++++ + +This section covers some details of the caching mechanism which are not discussed in the :ref:`user guide `. If you are developing a plugin and want to modify the caching behavior of your classes, we recommend you read :ref:`this section ` first. + +Disabling caching for ``WorkCalculation`` +----------------------------------------- + +As discussed in the :ref:`user guide `, nodes which can have ``RETURN`` links cannot be cached. This is enforced on two levels: + +* The ``_cacheable`` property is set to ``False`` in the :class:`.AbstractCalculation`, and only re-enabled in :class:`.AbstractJobCalculation` and :class:`InlineCalculation <.general.calculation.inline.InlineCalculation>`. This means that a ``WorkCalculation`` will not be cached. +* The ``_store_from_cache`` method, which is used to "clone" an existing node, will raise an error if the existing node has any ``RETURN`` links. This extra safe-guard prevents cases where a user might incorrectly override the ``_cacheable`` property on a ``WorkCalculation`` subclass. diff --git a/docs/source/developer_guide/index.rst b/docs/source/developer_guide/index.rst index 5375e00e0b..fec4a139ae 100644 --- a/docs/source/developer_guide/index.rst +++ b/docs/source/developer_guide/index.rst @@ -22,3 +22,4 @@ Developer's guide ../verdi/properties database_schema control/index + caching From 8596f7758dac973c890e2b86dd61a00cada5acd6 Mon Sep 17 00:00:00 2001 From: Dominik Gresch Date: Thu, 8 Feb 2018 03:03:30 +0100 Subject: [PATCH 101/101] Add 'retrieve_temporary_list' to updatable attributes --- aiida/orm/implementation/general/calculation/job/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida/orm/implementation/general/calculation/job/__init__.py b/aiida/orm/implementation/general/calculation/job/__init__.py index 75eeb35a5c..6797a43460 100644 --- a/aiida/orm/implementation/general/calculation/job/__init__.py +++ b/aiida/orm/implementation/general/calculation/job/__init__.py @@ -84,7 +84,7 @@ def _init_internal_params(self): 'state', 'job_id', 'scheduler_state', 'scheduler_lastchecktime', 'last_jobinfo', 'remote_workdir', 'retrieve_list', - 'retrieve_singlefile_list' + 'retrieve_singlefile_list', 'retrieve_temporary_list' ) # Files in which the scheduler output and error will be stored.