-
Notifications
You must be signed in to change notification settings - Fork 200
/
Copy pathdeserialization.py
280 lines (253 loc) · 13.2 KB
/
deserialization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
from aiida.common.exceptions import AiidaException
from aiida.utils.timezone import is_naive, make_aware, get_current_timezone
import json
class DeserializationException(AiidaException):
pass
def _deserialize_attribute(mainitem, subitems, sep, original_class=None,
original_pk=None, lesserrors=False):
"""
Deserialize a single attribute.
:param mainitem: the main item (either the attribute itself for base
types (None, string, ...) or the main item for lists and dicts.
Must contain the 'key' key and also the following keys:
datatype, tval, fval, ival, bval, dval.
NOTE that a type check is not performed! tval is expected to be a string,
dval a date, etc.
:param subitems: must be a dictionary of dictionaries. In the top-level dictionary,
the key must be the key of the attribute, stripped of all prefixes
(i.e., if the mainitem has key 'a.b' and we pass subitems
'a.b.0', 'a.b.1', 'a.b.1.c', their keys must be '0', '1', '1.c').
It must be None if the value is not iterable (int, str,
float, ...).
It is an empty dictionary if there are no subitems.
:param sep: a string, the separator between subfields (to separate the
name of a dictionary from the keys it contains, for instance)
:param original_class: if these elements come from a specific subclass
of DbMultipleValueAttributeBaseClass, pass here the class (note: the class,
not the instance!). This is used only in case the wrong number of elements
is found in the raw data, to print a more meaningful message (if the class
has a dbnode associated to it)
:param original_pk: if the elements come from a specific subclass
of DbMultipleValueAttributeBaseClass that has a dbnode associated to it,
pass here the PK integer. This is used only in case the wrong number
of elements is found in the raw data, to print a more meaningful message
:param lesserrors: If set to True, in some cases where the content of the
DB is not consistent but data is still recoverable,
it will just log the message rather than raising
an exception (e.g. if the number of elements of a dictionary is different
from the number declared in the ival field).
:return: the deserialized value
:raise DeserializationError: if an error occurs
"""
# from aiida.common import aiidalogger
if mainitem['datatype'] == 'none':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
return None
elif mainitem['datatype'] == 'bool':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
return mainitem['bval']
elif mainitem['datatype'] == 'int':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
return mainitem['ival']
elif mainitem['datatype'] == 'float':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
return mainitem['fval']
elif mainitem['datatype'] == 'txt':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
return mainitem['tval']
elif mainitem['datatype'] == 'date':
if subitems:
raise DeserializationException("'{}' is of a base type, "
"but has subitems!".format(mainitem.key))
if is_naive(mainitem['dval']):
return make_aware(mainitem['dval'], get_current_timezone())
else:
return mainitem['dval']
return mainitem['dval']
elif mainitem['datatype'] == 'list':
# subitems contains all subitems, here I store only those of
# deepness 1, i.e. if I have subitems '0', '1' and '1.c' I
# store only '0' and '1'
firstlevelsubdict = {k: v for k, v in subitems.iteritems()
if sep not in k}
# For checking, I verify the expected values
expected_set = set(["{:d}".format(i)
for i in range(mainitem['ival'])])
received_set = set(firstlevelsubdict.keys())
# If there are more entries than expected, but all expected
# ones are there, I just issue an error but I do not stop.
if not expected_set.issubset(received_set):
if (original_class is not None
and original_class._subspecifier_field_name is not None):
subspecifier_string = "{}={} and ".format(
original_class._subspecifier_field_name,
original_pk)
else:
subspecifier_string = ""
if original_class is None:
sourcestr = "the data passed"
else:
sourcestr = original_class.__name__
raise DeserializationException("Wrong list elements stored in {} for "
"{}key='{}' ({} vs {})".format(
sourcestr,
subspecifier_string,
mainitem['key'], expected_set, received_set))
if expected_set != received_set:
if (original_class is not None and
original_class._subspecifier_field_name is not None):
subspecifier_string = "{}={} and ".format(
original_class._subspecifier_field_name,
original_pk)
else:
subspecifier_string = ""
if original_class is None:
sourcestr = "the data passed"
else:
sourcestr = original_class.__name__
msg = ("Wrong list elements stored in {} for "
"{}key='{}' ({} vs {})".format(
sourcestr,
subspecifier_string,
mainitem['key'], expected_set, received_set))
if lesserrors:
print msg
#~ aiidalogger.error(msg)
else:
raise DeserializationException(msg)
# I get the values in memory as a dictionary
tempdict = {}
for firstsubk, firstsubv in firstlevelsubdict.iteritems():
# I call recursively the same function to get subitems
newsubitems = {k[len(firstsubk) + len(sep):]: v
for k, v in subitems.iteritems()
if k.startswith(firstsubk + sep)}
tempdict[firstsubk] = _deserialize_attribute(mainitem=firstsubv,
subitems=newsubitems, sep=sep, original_class=original_class,
original_pk=original_pk)
# And then I put them in a list
retlist = [tempdict["{:d}".format(i)] for i in range(mainitem['ival'])]
return retlist
elif mainitem['datatype'] == 'dict':
# subitems contains all subitems, here I store only those of
# deepness 1, i.e. if I have subitems '0', '1' and '1.c' I
# store only '0' and '1'
firstlevelsubdict = {k: v for k, v in subitems.iteritems()
if sep not in k}
if len(firstlevelsubdict) != mainitem['ival']:
if (original_class is not None and
original_class._subspecifier_field_name is not None):
subspecifier_string = "{}={} and ".format(
original_class._subspecifier_field_name,
original_pk)
else:
subspecifier_string = ""
if original_class is None:
sourcestr = "the data passed"
else:
sourcestr = original_class.__name__
msg = ("Wrong dict length stored in {} for "
"{}key='{}' ({} vs {})".format(
sourcestr,
subspecifier_string,
mainitem['key'], len(firstlevelsubdict),
mainitem['ival']))
if lesserrors:
print msg
#~ aiidalogger.error(msg)
else:
raise DeserializationException(msg)
# I get the values in memory as a dictionary
tempdict = {}
for firstsubk, firstsubv in firstlevelsubdict.iteritems():
# I call recursively the same function to get subitems
newsubitems = {k[len(firstsubk) + len(sep):]: v
for k, v in subitems.iteritems()
if k.startswith(firstsubk + sep)}
tempdict[firstsubk] = _deserialize_attribute(mainitem=firstsubv,
subitems=newsubitems, sep=sep, original_class=original_class,
original_pk=original_pk)
return tempdict
elif mainitem['datatype'] == 'json':
try:
return json.loads(mainitem['tval'])
except ValueError:
raise DeserializationException("Error in the content of the json field")
else:
raise DeserializationException("The type field '{}' is not recognized".format(
mainitem['datatype']))
def deserialize_attributes(data, sep, original_class=None, original_pk=None):
"""
Deserialize the attributes from the format internally stored in the DB
to the actual format (dictionaries, lists, integers, ...
:param data: must be a dictionary of dictionaries. In the top-level dictionary,
the key must be the key of the attribute. The value must be a dictionary
with the following keys: datatype, tval, fval, ival, bval, dval. Other
keys are ignored.
NOTE that a type check is not performed! tval is expected to be a string,
dval a date, etc.
:param sep: a string, the separator between subfields (to separate the
name of a dictionary from the keys it contains, for instance)
:param original_class: if these elements come from a specific subclass
of DbMultipleValueAttributeBaseClass, pass here the class (note: the class,
not the instance!). This is used only in case the wrong number of elements
is found in the raw data, to print a more meaningful message (if the class
has a dbnode associated to it)
:param original_pk: if the elements come from a specific subclass
of DbMultipleValueAttributeBaseClass that has a dbnode associated to it,
pass here the PK integer. This is used only in case the wrong number
of elements is found in the raw data, to print a more meaningful message
:return: a dictionary, where for each entry the corresponding value is
returned, deserialized back to lists, dictionaries, etc.
Example: if ``data = {'a': {'datatype': "list", "ival": 2, ...},
'a.0': {'datatype': "int", "ival": 2, ...},
'a.1': {'datatype': "txt", "tval": "yy"}]``,
it will return ``{"a": [2, "yy"]}``
"""
from collections import defaultdict
# I group results by zero-level entity
found_mainitems = {}
found_subitems = defaultdict(dict)
for mainkey, descriptiondict in data.iteritems():
prefix, thissep, postfix = mainkey.partition(sep)
if thissep:
found_subitems[prefix][postfix] = {k: v for k, v
in descriptiondict.iteritems() if k != "key"}
else:
mainitem = descriptiondict.copy()
mainitem['key'] = prefix
found_mainitems[prefix] = mainitem
# There can be mainitems without subitems, but there should not be subitems
# without mainitmes.
lone_subitems = set(found_subitems.keys()) - set(found_mainitems.keys())
if lone_subitems:
raise DeserializationException("Missing base keys for the following "
"items: {}".format(",".join(lone_subitems)))
# For each zero-level entity, I call the _deserialize_attribute function
retval = {}
for k, v in found_mainitems.iteritems():
# Note: found_subitems[k] will return an empty dictionary it the
# key does not exist, as it is a defaultdict
retval[k] = _deserialize_attribute(mainitem=v,
subitems=found_subitems[k], sep=sep, original_class=original_class,
original_pk=original_pk)
return retval