Skip to content

Commit

Permalink
Fix decoding-encoding tests
Browse files Browse the repository at this point in the history
  - Refine equivalence checks (exclude lossy or unordered decode)
  - Fix for default converter, AbderaConverter and JsonMLConverter
    on decode of xs:anyType
  • Loading branch information
brunato committed Dec 24, 2019
1 parent a17f385 commit 1b0db2e
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 48 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
CHANGELOG
*********

`v1.0.18`_ (2019-12-24)
=======================
* Fix for *ModelVisitor.iter_unordered_content()*
* Fixed default converter, AbderaConverter and JsonMLConverter for xs:anyType decode
* Fixed validation tests with all converters
* Added UnorderedConverter to validation tests

`v1.0.17`_ (2019-12-22)
=======================
* Enhancement of validation-only speed (~15%)
Expand Down Expand Up @@ -278,3 +285,4 @@ v0.9.6 (2017-05-05)
.. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15
.. _v1.0.16: https://github.com/brunato/xmlschema/compare/v1.0.15...v1.0.16
.. _v1.0.17: https://github.com/brunato/xmlschema/compare/v1.0.16...v1.0.17
.. _v1.0.18: https://github.com/brunato/xmlschema/compare/v1.0.17...v1.0.18
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '1.0.17'
release = '1.0.18'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
4 changes: 2 additions & 2 deletions publiccode.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2'
name: xmlschema
url: 'https://github.com/sissaschool/xmlschema'
landingURL: 'https://github.com/sissaschool/xmlschema'
releaseDate: '2019-12-22'
softwareVersion: v1.0.17
releaseDate: '2019-12-24'
softwareVersion: v1.0.18
developmentStatus: stable
platforms:
- linux
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def run(self):

setup(
name='xmlschema',
version='1.0.17',
version='1.0.18',
setup_requires=['elementpath~=1.3.0'],
install_requires=['elementpath~=1.3.0'],
packages=['xmlschema'],
Expand Down
2 changes: 1 addition & 1 deletion xmlschema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11
)

__version__ = '1.0.17'
__version__ = '1.0.18'
__author__ = "Davide Brunato"
__contact__ = "brunato@sissa.it"
__copyright__ = "Copyright 2016-2019, SISSA"
Expand Down
49 changes: 27 additions & 22 deletions xmlschema/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,21 +283,25 @@ def element_decode(self, data, xsd_element, level=0):

has_single_group = xsd_element.type.content_type.is_single()
list_types = list if self.list is list else (self.list, list)
for name, value, xsd_child in self.map_content(data.content):
try:
result = result_dict[name]
except KeyError:
if xsd_child is None or has_single_group and xsd_child.is_single():
result_dict[name] = self.list([value]) if self.force_list else value
else:
result_dict[name] = self.list([value])
else:
if not isinstance(result, list_types) or not result:
result_dict[name] = self.list([result, value])
elif isinstance(result[0], list_types) or not isinstance(value, list_types):
result.append(value)
if data.content:
for name, value, xsd_child in self.map_content(data.content):
try:
result = result_dict[name]
except KeyError:
if xsd_child is None or has_single_group and xsd_child.is_single():
result_dict[name] = self.list([value]) if self.force_list else value
else:
result_dict[name] = self.list([value])
else:
result_dict[name] = self.list([result, value])
if not isinstance(result, list_types) or not result:
result_dict[name] = self.list([result, value])
elif isinstance(result[0], list_types) or not isinstance(value, list_types):
result.append(value)
else:
result_dict[name] = self.list([result, value])

elif data.text is not None and data.text != '':
result_dict[self.text_key] = data.text

if level == 0 and self.preserve_root:
return self.dict([(self.map_qname(data.tag), result_dict if result_dict else None)])
Expand Down Expand Up @@ -771,7 +775,7 @@ def __setattr__(self, name, value):

@property
def lossy(self):
return True
return True # Loss cdata parts

def element_decode(self, data, xsd_element, level=0):
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
Expand All @@ -792,7 +796,7 @@ def element_decode(self, data, xsd_element, level=0):
except AttributeError:
children[name] = self.list([children[name], value])
if not children:
children = None
children = data.text if data.text is not None and data.text != '' else None

if data.attributes:
if children != []:
Expand Down Expand Up @@ -890,24 +894,25 @@ def losslessly(self):
return True

def element_decode(self, data, xsd_element, level=0):
result_list = self.list([self.map_qname(data.tag)])
attributes = self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])
result_list = self.list()
result_list.append(self.map_qname(data.tag))
if data.text is not None and data.text != '':
result_list.append(data.text)

if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
if data.text is not None and data.text != '':
result_list.append(data.text)
else:
if not xsd_element.type.has_simple_content():
result_list.extend([
value if value is not None else self.list([name])
for name, value, _ in self.map_content(data.content)
])

attributes = self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])
if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self:
attributes.update(
[('xmlns:%s' % k if k else 'xmlns', v) for k, v in self._namespaces.items()]
)
if attributes:
result_list.insert(1, attributes)

return result_list

def element_encode(self, obj, xsd_element, level=0):
Expand Down
7 changes: 4 additions & 3 deletions xmlschema/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True, un
_REGEX_SPACES = re.compile(r'\s+')

if unordered:
children = sorted(elem, key=lambda x: x.tag is lxml_etree_comment or x.tag)
other_children = iter(sorted(other, key=lambda x: x.tag is lxml_etree_comment or x.tag))
children = sorted(elem, key=lambda x: '' if x.tag is lxml_etree_comment else x.tag)
other_children = iter(sorted(other, key=lambda x: '' if x.tag is lxml_etree_comment else x.tag))
else:
children = elem
other_children = iter(other)
Expand All @@ -309,7 +309,8 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True, un
assert e1.tag == e2.tag, "%r != %r: tags differ." % (e1, e2)
else:
namespace = get_namespace(e1.tag) or namespace
assert get_qname(namespace, e1.tag) == get_qname(namespace, e2.tag), "%r != %r: tags differ." % (e1, e2)
assert get_qname(namespace, e1.tag) == get_qname(namespace, e2.tag), \
"%r != %r: tags differ." % (e1, e2)

# Attributes
if e1.attrib != e2.attrib:
Expand Down
39 changes: 28 additions & 11 deletions xmlschema/tests/test_factory/validation_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from xmlschema.compat import unicode_type, ordered_dict_class
from xmlschema.etree import etree_tostring, ElementTree, \
etree_elements_assert_equal, lxml_etree, lxml_etree_element
from xmlschema.qnames import XSI_TYPE
from xmlschema.resources import fetch_namespaces

from xmlschema.tests import XsdValidatorTestCase
Expand Down Expand Up @@ -92,7 +91,11 @@ def setUpClass(cls):

def check_etree_encode(self, root, converter=None, **kwargs):
namespaces = kwargs.get('namespaces', {})
unordered = converter is UnorderedConverter or kwargs.get('unordered', False)

lossy = converter in (ParkerConverter, AbderaConverter)
losslessly = converter is JsonMLConverter
unordered = converter not in (AbderaConverter, JsonMLConverter) or \
kwargs.get('unordered', False)

data1 = self.schema.decode(root, converter=converter, **kwargs)
if isinstance(data1, tuple):
Expand Down Expand Up @@ -124,14 +127,16 @@ def check_etree_encode(self, root, converter=None, **kwargs):
# If the check fails retry only if the converter is lossy (eg. ParkerConverter)
# or if the XML case has defaults taken from the schema or some part of data
# decoding is skipped by schema wildcards (set the specific argument in testfiles).
if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict:
if skip_strict:
pass # can't ensure encode equivalence if the test case use defaults
elif lossy:
pass # can't check encode equivalence if the converter is lossy
elif losslessly:
if debug_mode:
pdb.set_trace()
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original")
elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
return # can't check encode equivalence if xsi:type is provided
else:
# Lossy or augmenting cases are checked after another decoding/encoding pass
# Lossy or augmenting cases are checked with another decoding/encoding pass
data2 = self.schema.decode(elem1, converter=converter, **kwargs)
if isinstance(data2, tuple):
data2 = data2[0]
Expand All @@ -157,6 +162,10 @@ def check_etree_encode(self, root, converter=None, **kwargs):
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass")

def check_json_serialization(self, root, converter=None, **kwargs):
lossy = converter in (ParkerConverter, AbderaConverter)
unordered = converter not in (AbderaConverter, JsonMLConverter) or \
kwargs.get('unordered', False)

data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs)
if isinstance(data1, tuple):
data1 = data1[0]
Expand All @@ -169,17 +178,22 @@ def check_json_serialization(self, root, converter=None, **kwargs):
if isinstance(data2, tuple):
data2 = data2[0]

if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
return # can't check encode equivalence if xsi:type is provided
elif sys.version_info >= (3, 6):
if data2 != data1 and (skip_strict or lossy or unordered):
# Can't ensure decode equivalence if the test case use defaults,
# or the converter is lossy or the decoding is unordered.
return

if sys.version_info >= (3, 6):
if data1 != data2:
print(data1)
print(data2)
print(converter, unordered)
self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass")
else:
elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs)
if isinstance(elem2, tuple):
elem2 = elem2[0]

unordered = converter is UnorderedConverter or kwargs.get('unordered')

try:
self.assertIsNone(etree_elements_assert_equal(
elem1, elem2, strict=False, skip_comments=True, unordered=unordered
Expand Down Expand Up @@ -263,6 +277,7 @@ def check_encoding_with_element_tree(self):
self.check_json_serialization(root, BadgerFishConverter, **options)
self.check_json_serialization(root, AbderaConverter, **options)
self.check_json_serialization(root, JsonMLConverter, **options)
self.check_json_serialization(root, UnorderedConverter, **options)

def check_decoding_and_encoding_with_lxml(self):
xml_tree = lxml_etree.parse(xml_file)
Expand Down Expand Up @@ -297,6 +312,7 @@ def check_decoding_and_encoding_with_lxml(self):
self.check_etree_encode(root, BadgerFishConverter, **options)
self.check_etree_encode(root, AbderaConverter, **options)
self.check_etree_encode(root, JsonMLConverter, **options)
self.check_etree_encode(root, UnorderedConverter, cdata_prefix='#', **options)

options.pop('dict_class')
self.check_json_serialization(root, cdata_prefix='#', **options)
Expand All @@ -306,6 +322,7 @@ def check_decoding_and_encoding_with_lxml(self):
self.check_json_serialization(root, BadgerFishConverter, **options)
self.check_json_serialization(root, AbderaConverter, **options)
self.check_json_serialization(root, JsonMLConverter, **options)
self.check_json_serialization(root, UnorderedConverter, **options)

def check_validate_and_is_valid_api(self):
if expected_errors:
Expand Down
1 change: 1 addition & 0 deletions xmlschema/tests/test_w3c_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
'../msData/additional/test93490_8.xml', # 4799: Idem
'../msData/datatypes/gMonth002.xml', # 8017: gMonth bogus: conflicts with other invalid schema tests
'../msData/datatypes/gMonth004.xml', # 8019: (http://www.w3.org/Bugs/Public/show_bug.cgi?id=6901)
'../wgData/sg/e1.xml', # 14896: wrong href for valid instanceTest name="e1bis.xml"

# Valid XML tests
'../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml', # defaultAttributeApply is true (false in comment)
Expand Down
20 changes: 13 additions & 7 deletions xmlschema/validators/complex_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \
get_qname, local_name
from ..helpers import get_xsd_derivation_attribute
from ..converters import ElementData

from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError
from .exceptions import XMLSchemaDecodeError
from .xsdbase import XsdComponent, XsdType, ValidationMixin
from .assertions import XsdAssert
from .attributes import XsdAttributeGroup
Expand Down Expand Up @@ -608,12 +607,19 @@ def iter_encode(self, obj, validation='lax', **kwargs):
xsd_element.type = self

if isinstance(value, list):
for item in value:
for result in xsd_element.iter_encode(item, validation, **kwargs):
try:
results = [x for item in value for x in xsd_element.iter_encode(
item, validation, **kwargs
)]
except XMLSchemaValueError:
pass
else:
for result in results:
yield result
else:
for result in xsd_element.iter_encode(value, validation, **kwargs):
yield result
return

for result in xsd_element.iter_encode(value, validation, **kwargs):
yield result


class Xsd11ComplexType(XsdComplexType):
Expand Down
4 changes: 4 additions & 0 deletions xmlschema/validators/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,10 @@ def iter_decode(self, elem, validation='lax', **kwargs):
yield self.validation_error(validation, result, elem, **kwargs)
else:
content = result

if len(content) == 1 and content[0][0] == 1:
value, content = content[0][1], None

else:
if len(elem) and validation != 'skip':
reason = "a simple content element can't has child elements."
Expand Down

0 comments on commit 1b0db2e

Please sign in to comment.