From 1b0db2e7d2762b8e6887718e5e6d8b4208048a85 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 24 Dec 2019 10:47:48 +0100 Subject: [PATCH] Fix decoding-encoding tests - Refine equivalence checks (exclude lossy or unordered decode) - Fix for default converter, AbderaConverter and JsonMLConverter on decode of xs:anyType --- CHANGELOG.rst | 8 +++ doc/conf.py | 2 +- publiccode.yml | 4 +- setup.py | 2 +- xmlschema/__init__.py | 2 +- xmlschema/converters.py | 49 ++++++++++--------- xmlschema/etree.py | 7 +-- .../tests/test_factory/validation_tests.py | 39 ++++++++++----- xmlschema/tests/test_w3c_suite.py | 1 + xmlschema/validators/complex_types.py | 20 +++++--- xmlschema/validators/elements.py | 4 ++ 11 files changed, 90 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e11b9ffa..da3c2bcc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,13 @@ CHANGELOG ********* +`v1.0.18`_ (2019-12-24) +======================= +* Fix for *ModelVisitor.iter_unordered_content()* +* Fixed default converter, AbderaConverter and JsonMLConverter for xs:anyType decode +* Fixed validation tests with all converters +* Added UnorderedConverter to validation tests + `v1.0.17`_ (2019-12-22) ======================= * Enhancement of validation-only speed (~15%) @@ -278,3 +285,4 @@ v0.9.6 (2017-05-05) .. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15 .. _v1.0.16: https://github.com/brunato/xmlschema/compare/v1.0.15...v1.0.16 .. _v1.0.17: https://github.com/brunato/xmlschema/compare/v1.0.16...v1.0.17 +.. _v1.0.18: https://github.com/brunato/xmlschema/compare/v1.0.17...v1.0.18 diff --git a/doc/conf.py b/doc/conf.py index 53a49434..f39d25ed 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ # The short X.Y version. version = '1.0' # The full version, including alpha/beta/rc tags. -release = '1.0.17' +release = '1.0.18' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/publiccode.yml b/publiccode.yml index 77d0259e..2abad5ef 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-12-22' -softwareVersion: v1.0.17 +releaseDate: '2019-12-24' +softwareVersion: v1.0.18 developmentStatus: stable platforms: - linux diff --git a/setup.py b/setup.py index 520d5c76..ef312afc 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def run(self): setup( name='xmlschema', - version='1.0.17', + version='1.0.18', setup_requires=['elementpath~=1.3.0'], install_requires=['elementpath~=1.3.0'], packages=['xmlschema'], diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 346076a6..e56cc625 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -31,7 +31,7 @@ XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11 ) -__version__ = '1.0.17' +__version__ = '1.0.18' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2016-2019, SISSA" diff --git a/xmlschema/converters.py b/xmlschema/converters.py index f8b7bfc2..84a17e36 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -283,21 +283,25 @@ def element_decode(self, data, xsd_element, level=0): has_single_group = xsd_element.type.content_type.is_single() list_types = list if self.list is list else (self.list, list) - for name, value, xsd_child in self.map_content(data.content): - try: - result = result_dict[name] - except KeyError: - if xsd_child is None or has_single_group and xsd_child.is_single(): - result_dict[name] = self.list([value]) if self.force_list else value - else: - result_dict[name] = self.list([value]) - else: - if not isinstance(result, list_types) or not result: - result_dict[name] = self.list([result, value]) - elif isinstance(result[0], list_types) or not isinstance(value, list_types): - result.append(value) + if data.content: + for name, value, xsd_child in self.map_content(data.content): + try: + result = result_dict[name] + except KeyError: + if xsd_child is None or has_single_group and xsd_child.is_single(): + result_dict[name] = self.list([value]) if self.force_list else value + else: + result_dict[name] = self.list([value]) else: - result_dict[name] = self.list([result, value]) + if not isinstance(result, list_types) or not result: + result_dict[name] = self.list([result, value]) + elif isinstance(result[0], list_types) or not isinstance(value, list_types): + result.append(value) + else: + result_dict[name] = self.list([result, value]) + + elif data.text is not None and data.text != '': + result_dict[self.text_key] = data.text if level == 0 and self.preserve_root: return self.dict([(self.map_qname(data.tag), result_dict if result_dict else None)]) @@ -771,7 +775,7 @@ def __setattr__(self, name, value): @property def lossy(self): - return True + return True # Loss cdata parts def element_decode(self, data, xsd_element, level=0): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): @@ -792,7 +796,7 @@ def element_decode(self, data, xsd_element, level=0): except AttributeError: children[name] = self.list([children[name], value]) if not children: - children = None + children = data.text if data.text is not None and data.text != '' else None if data.attributes: if children != []: @@ -890,24 +894,25 @@ def losslessly(self): return True def element_decode(self, data, xsd_element, level=0): - result_list = self.list([self.map_qname(data.tag)]) - attributes = self.dict([(k, v) for k, v in self.map_attributes(data.attributes)]) + result_list = self.list() + result_list.append(self.map_qname(data.tag)) + if data.text is not None and data.text != '': + result_list.append(data.text) - if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): - if data.text is not None and data.text != '': - result_list.append(data.text) - else: + if not xsd_element.type.has_simple_content(): result_list.extend([ value if value is not None else self.list([name]) for name, value, _ in self.map_content(data.content) ]) + attributes = self.dict([(k, v) for k, v in self.map_attributes(data.attributes)]) if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: attributes.update( [('xmlns:%s' % k if k else 'xmlns', v) for k, v in self._namespaces.items()] ) if attributes: result_list.insert(1, attributes) + return result_list def element_encode(self, obj, xsd_element, level=0): diff --git a/xmlschema/etree.py b/xmlschema/etree.py index ae172d7a..6701ad09 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -289,8 +289,8 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True, un _REGEX_SPACES = re.compile(r'\s+') if unordered: - children = sorted(elem, key=lambda x: x.tag is lxml_etree_comment or x.tag) - other_children = iter(sorted(other, key=lambda x: x.tag is lxml_etree_comment or x.tag)) + children = sorted(elem, key=lambda x: '' if x.tag is lxml_etree_comment else x.tag) + other_children = iter(sorted(other, key=lambda x: '' if x.tag is lxml_etree_comment else x.tag)) else: children = elem other_children = iter(other) @@ -309,7 +309,8 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True, un assert e1.tag == e2.tag, "%r != %r: tags differ." % (e1, e2) else: namespace = get_namespace(e1.tag) or namespace - assert get_qname(namespace, e1.tag) == get_qname(namespace, e2.tag), "%r != %r: tags differ." % (e1, e2) + assert get_qname(namespace, e1.tag) == get_qname(namespace, e2.tag), \ + "%r != %r: tags differ." % (e1, e2) # Attributes if e1.attrib != e2.attrib: diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py index da01ca19..0c819cb9 100644 --- a/xmlschema/tests/test_factory/validation_tests.py +++ b/xmlschema/tests/test_factory/validation_tests.py @@ -23,7 +23,6 @@ from xmlschema.compat import unicode_type, ordered_dict_class from xmlschema.etree import etree_tostring, ElementTree, \ etree_elements_assert_equal, lxml_etree, lxml_etree_element -from xmlschema.qnames import XSI_TYPE from xmlschema.resources import fetch_namespaces from xmlschema.tests import XsdValidatorTestCase @@ -92,7 +91,11 @@ def setUpClass(cls): def check_etree_encode(self, root, converter=None, **kwargs): namespaces = kwargs.get('namespaces', {}) - unordered = converter is UnorderedConverter or kwargs.get('unordered', False) + + lossy = converter in (ParkerConverter, AbderaConverter) + losslessly = converter is JsonMLConverter + unordered = converter not in (AbderaConverter, JsonMLConverter) or \ + kwargs.get('unordered', False) data1 = self.schema.decode(root, converter=converter, **kwargs) if isinstance(data1, tuple): @@ -124,14 +127,16 @@ def check_etree_encode(self, root, converter=None, **kwargs): # If the check fails retry only if the converter is lossy (eg. ParkerConverter) # or if the XML case has defaults taken from the schema or some part of data # decoding is skipped by schema wildcards (set the specific argument in testfiles). - if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: + if skip_strict: + pass # can't ensure encode equivalence if the test case use defaults + elif lossy: + pass # can't check encode equivalence if the converter is lossy + elif losslessly: if debug_mode: pdb.set_trace() raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original") - elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): - return # can't check encode equivalence if xsi:type is provided else: - # Lossy or augmenting cases are checked after another decoding/encoding pass + # Lossy or augmenting cases are checked with another decoding/encoding pass data2 = self.schema.decode(elem1, converter=converter, **kwargs) if isinstance(data2, tuple): data2 = data2[0] @@ -157,6 +162,10 @@ def check_etree_encode(self, root, converter=None, **kwargs): raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass") def check_json_serialization(self, root, converter=None, **kwargs): + lossy = converter in (ParkerConverter, AbderaConverter) + unordered = converter not in (AbderaConverter, JsonMLConverter) or \ + kwargs.get('unordered', False) + data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) if isinstance(data1, tuple): data1 = data1[0] @@ -169,17 +178,22 @@ def check_json_serialization(self, root, converter=None, **kwargs): if isinstance(data2, tuple): data2 = data2[0] - if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): - return # can't check encode equivalence if xsi:type is provided - elif sys.version_info >= (3, 6): + if data2 != data1 and (skip_strict or lossy or unordered): + # Can't ensure decode equivalence if the test case use defaults, + # or the converter is lossy or the decoding is unordered. + return + + if sys.version_info >= (3, 6): + if data1 != data2: + print(data1) + print(data2) + print(converter, unordered) self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass") else: elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) if isinstance(elem2, tuple): elem2 = elem2[0] - unordered = converter is UnorderedConverter or kwargs.get('unordered') - try: self.assertIsNone(etree_elements_assert_equal( elem1, elem2, strict=False, skip_comments=True, unordered=unordered @@ -263,6 +277,7 @@ def check_encoding_with_element_tree(self): self.check_json_serialization(root, BadgerFishConverter, **options) self.check_json_serialization(root, AbderaConverter, **options) self.check_json_serialization(root, JsonMLConverter, **options) + self.check_json_serialization(root, UnorderedConverter, **options) def check_decoding_and_encoding_with_lxml(self): xml_tree = lxml_etree.parse(xml_file) @@ -297,6 +312,7 @@ def check_decoding_and_encoding_with_lxml(self): self.check_etree_encode(root, BadgerFishConverter, **options) self.check_etree_encode(root, AbderaConverter, **options) self.check_etree_encode(root, JsonMLConverter, **options) + self.check_etree_encode(root, UnorderedConverter, cdata_prefix='#', **options) options.pop('dict_class') self.check_json_serialization(root, cdata_prefix='#', **options) @@ -306,6 +322,7 @@ def check_decoding_and_encoding_with_lxml(self): self.check_json_serialization(root, BadgerFishConverter, **options) self.check_json_serialization(root, AbderaConverter, **options) self.check_json_serialization(root, JsonMLConverter, **options) + self.check_json_serialization(root, UnorderedConverter, **options) def check_validate_and_is_valid_api(self): if expected_errors: diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index f695af3a..e6212ee2 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -101,6 +101,7 @@ '../msData/additional/test93490_8.xml', # 4799: Idem '../msData/datatypes/gMonth002.xml', # 8017: gMonth bogus: conflicts with other invalid schema tests '../msData/datatypes/gMonth004.xml', # 8019: (http://www.w3.org/Bugs/Public/show_bug.cgi?id=6901) + '../wgData/sg/e1.xml', # 14896: wrong href for valid instanceTest name="e1bis.xml" # Valid XML tests '../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml', # defaultAttributeApply is true (false in comment) diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index a33649f0..be33dd37 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -17,9 +17,8 @@ XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \ get_qname, local_name from ..helpers import get_xsd_derivation_attribute -from ..converters import ElementData -from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError +from .exceptions import XMLSchemaDecodeError from .xsdbase import XsdComponent, XsdType, ValidationMixin from .assertions import XsdAssert from .attributes import XsdAttributeGroup @@ -608,12 +607,19 @@ def iter_encode(self, obj, validation='lax', **kwargs): xsd_element.type = self if isinstance(value, list): - for item in value: - for result in xsd_element.iter_encode(item, validation, **kwargs): + try: + results = [x for item in value for x in xsd_element.iter_encode( + item, validation, **kwargs + )] + except XMLSchemaValueError: + pass + else: + for result in results: yield result - else: - for result in xsd_element.iter_encode(value, validation, **kwargs): - yield result + return + + for result in xsd_element.iter_encode(value, validation, **kwargs): + yield result class Xsd11ComplexType(XsdComplexType): diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index a9e336f9..e51cede1 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -589,6 +589,10 @@ def iter_decode(self, elem, validation='lax', **kwargs): yield self.validation_error(validation, result, elem, **kwargs) else: content = result + + if len(content) == 1 and content[0][0] == 1: + value, content = content[0][1], None + else: if len(elem) and validation != 'skip': reason = "a simple content element can't has child elements."