Skip to content

Commit

Permalink
Merge branch 'develop' for updating to release v1.0.10
Browse files Browse the repository at this point in the history
  • Loading branch information
brunato committed Feb 25, 2019
2 parents 49f2fb1 + 4dc7714 commit 714b71d
Show file tree
Hide file tree
Showing 30 changed files with 948 additions and 669 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
CHANGELOG
*********

`v1.0.10`_ (2019-02-25)
=======================
* Fixed Element type mismatch issue when apply *SafeXMLParser* to schema resources
* More XSD 1.1 features implemented (open content and versioning namespace are missing)

`v1.0.9`_ (2019-02-03)
======================
* Programmatic import of ElementTree for avoid module mismatches
Expand Down Expand Up @@ -220,3 +225,4 @@ v0.9.6 (2017-05-05)
.. _v1.0.7: https://github.com/brunato/xmlschema/compare/v1.0.6...v1.0.7
.. _v1.0.8: https://github.com/brunato/xmlschema/compare/v1.0.7...v1.0.8
.. _v1.0.9: https://github.com/brunato/xmlschema/compare/v1.0.8...v1.0.9
.. _v1.0.10: https://github.com/brunato/xmlschema/compare/v1.0.9...v1.0.10
8 changes: 5 additions & 3 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ Resource access API
.. autoattribute:: url
.. autoattribute:: base_url
.. autoattribute:: namespace
.. autoattribute:: parse
.. autoattribute:: iterparse
.. autoattribute:: fromstring

.. automethod:: copy
.. automethod:: tostring
Expand All @@ -159,6 +156,11 @@ Resource access API
.. automethod:: get_namespaces
.. automethod:: get_locations

.. automethod:: defusing
.. automethod:: parse
.. automethod:: iterparse
.. automethod:: fromstring


.. autofunction:: xmlschema.fetch_resource
.. autofunction:: xmlschema.fetch_schema
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '1.0.9'
release = '1.0.10'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
5 changes: 0 additions & 5 deletions doc/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -522,14 +522,9 @@ XML entity-based attacks protection

The XML data resource loading is protected using the `SafeXMLParser` class, a subclass of
the pure Python version of XMLParser that forbids the use of entities.

The protection is applied both to XSD schemas and to XML data. The usage of this feature is
regulated by the XMLSchema's argument *defuse*.
For default this argument has value *'remote'* that means the protection on XML data is
applied only to data loaded from remote. Other values for this argument can be *'always'*
and *'never'*.

The `SafeXMLParser` requires the usage of the pure Python module of ElementTree and this
involves the penalty that trees loaded by this parser can't be serialized with pickle,
that in Python 3 works with the C implementation of ElementTree.

2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Requirements for setup a development environment for the xmlschema package.
setuptools
tox
elementpath>=1.1.2
elementpath~=1.1.5
lxml
memory_profiler
pathlib2 # For Py27 tests on resources
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

setup(
name='xmlschema',
version='1.0.9',
install_requires=['elementpath>=1.1.2'],
version='1.0.10',
install_requires=['elementpath~=1.1.5'],
packages=['xmlschema'],
include_package_data=True,
author='Davide Brunato',
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ toxworkdir = {homedir}/.tox/xmlschema
[testenv]
deps =
lxml
elementpath>=1.1.2
elementpath~=1.1.5
commands = python xmlschema/tests/test_all.py {posargs}

[testenv:py27]
deps =
lxml
elementpath>=1.1.2
elementpath~=1.1.5
pathlib2
commands = python xmlschema/tests/test_all.py {posargs}
2 changes: 1 addition & 1 deletion xmlschema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
XMLSchemaImportWarning, XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10
)

__version__ = '1.0.9'
__version__ = '1.0.10'
__author__ = "Davide Brunato"
__contact__ = "brunato@sissa.it"
__copyright__ = "Copyright 2016-2019, SISSA"
Expand Down
3 changes: 2 additions & 1 deletion xmlschema/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@
from urllib.request import urlopen, urljoin, urlsplit, pathname2url
from urllib.parse import uses_relative, urlparse, urlunsplit
from urllib.error import URLError
from io import StringIO
from io import StringIO, BytesIO
from collections.abc import Iterable, MutableSet, Sequence, MutableSequence, Mapping, MutableMapping
except ImportError:
# Python 2.7 imports
from urllib import pathname2url
from urllib2 import urlopen, URLError
from urlparse import urlsplit, urljoin, uses_relative, urlparse, urlunsplit
from StringIO import StringIO # the io.StringIO accepts only unicode type
from io import BytesIO
from collections import Iterable, MutableSet, Sequence, MutableSequence, Mapping, MutableMapping


Expand Down
24 changes: 0 additions & 24 deletions xmlschema/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,27 +229,3 @@ def get_xsd_derivation_attribute(elem, attribute, values):
elif not all([s in values for s in items]):
raise XMLSchemaValueError("wrong value %r for attribute %r." % (value, attribute))
return value


def get_xpath_default_namespace(elem, default_namespace, target_namespace, default=None):
"""
Get the xpathDefaultNamespace attribute value for alternative, assert, assertion, selector
and field XSD 1.1 declarations, checking if the value is conforming to the specification.
"""
value = elem.get('xpathDefaultNamespace')
if value is None:
return default

value = value.strip()
if value == '##local':
return ''
elif value == '##defaultNamespace':
return default_namespace
elif value == '##targetNamespace':
return target_namespace
elif len(value.split()) == 1:
return value
else:
admitted_values = ('##defaultNamespace', '##targetNamespace', '##local')
msg = "wrong value %r for 'xpathDefaultNamespace' attribute, can be (anyURI | %s)."
raise XMLSchemaValueError(msg % (value, ' | '.join(admitted_values)))
1 change: 1 addition & 0 deletions xmlschema/qnames.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def xsi_qname(name):
XSD_INCLUDE = xsd_qname('include')
XSD_IMPORT = xsd_qname('import')
XSD_REDEFINE = xsd_qname('redefine')
XSD_OVERRIDE = xsd_qname('override')

# Structures
XSD_SIMPLE_TYPE = xsd_qname('simpleType')
Expand Down
73 changes: 51 additions & 22 deletions xmlschema/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import codecs

from .compat import (
PY3, StringIO, string_base_type, urlopen, urlsplit, urljoin, urlunsplit,
PY3, StringIO, BytesIO, string_base_type, urlopen, urlsplit, urljoin, urlunsplit,
pathname2url, URLError, uses_relative
)
from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError
Expand Down Expand Up @@ -220,8 +220,8 @@ class XMLResource(object):
object or an ElementTree or an Element.
:param base_url: is an optional base URL, used for the normalization of relative paths when \
the URL of the resource can't be obtained from the source argument.
:param defuse: set the usage of defusedxml library for parsing XML data. Can be 'always', \
'remote' or 'never'. Default is 'remote' that uses the defusedxml only when loading remote data.
:param defuse: set the usage of SafeXMLParser for XML data. Can be 'always', 'remote' or 'never'. \
Default is 'remote' that uses the defusedxml only when loading remote data.
:param timeout: the timeout in seconds for the connection attempt in case of remote data.
:param lazy: if set to `False` the source is fully loaded into and processed from memory. Default is `True`.
"""
Expand Down Expand Up @@ -270,7 +270,7 @@ def __setattr__(self, name, value):
def _fromsource(self, source):
url, lazy = None, self._lazy
if is_etree_element(source):
return source, None, None, None
return source, None, None, None # Source is already an Element --> nothing to load
elif isinstance(source, string_base_type):
_url, self._url = self._url, None
try:
Expand All @@ -280,7 +280,7 @@ def _fromsource(self, source):
return root, None, source, None
else:
return self.fromstring(source), None, source, None
except (ElementTree.ParseError, UnicodeEncodeError):
except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
if '\n' in source:
raise
finally:
Expand Down Expand Up @@ -383,19 +383,48 @@ def namespace(self):
"""The namespace of the XML document."""
return get_namespace(self._root.tag) if self._root is not None else None

@staticmethod
def defusing(source):
"""
Defuse an XML source, raising an `ElementTree.ParseError` if the source contains entity
definitions or remote entity loading.
:param source: a filename or file object containing XML data.
"""
parser = SafeXMLParser(target=PyElementTree.TreeBuilder())
try:
for _, _ in PyElementTree.iterparse(source, ('start',), parser):
break
except PyElementTree.ParseError as err:
raise ElementTree.ParseError(str(err))

def parse(self, source):
"""The ElementTree parse method, depends from 'defuse' and 'url' attributes."""
"""
An equivalent of *ElementTree.parse()* that can protect from XML entities attacks. When
protection is applied XML data are loaded and defused before building the ElementTree instance.
:param source: a filename or file object containing XML data.
:returns: an ElementTree instance.
"""
if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
parser = SafeXMLParser(target=PyElementTree.TreeBuilder())
try:
return PyElementTree.parse(source, parser)
except PyElementTree.ParseError as err:
raise ElementTree.ParseError(str(err))
text = source.read()
if isinstance(text, bytes):
self.defusing(BytesIO(text))
return ElementTree.parse(BytesIO(text))
else:
self.defusing(StringIO(text))
return ElementTree.parse(StringIO(text))
else:
return ElementTree.parse(source)

def iterparse(self, source, events=None):
"""The ElementTree iterparse method, depends from 'defuse' and 'url' attributes."""
"""
An equivalent of *ElementTree.iterparse()* that can protect from XML entities attacks.
When protection is applied the iterator yields pure-Python Element instances.
:param source: a filename or file object containing XML data.
:param events: a list of events to report back. If omitted, only “end” events are reported.
"""
if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
parser = SafeXMLParser(target=PyElementTree.TreeBuilder())
try:
Expand All @@ -406,15 +435,15 @@ def iterparse(self, source, events=None):
return ElementTree.iterparse(source, events)

def fromstring(self, text):
"""The ElementTree fromstring method, depends from 'defuse' and 'url' attributes."""
"""
An equivalent of *ElementTree.fromstring()* that can protect from XML entities attacks.
:param text: a string containing XML data.
:returns: the root Element instance.
"""
if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
parser = SafeXMLParser(target=PyElementTree.TreeBuilder())
try:
return PyElementTree.fromstring(text, parser)
except PyElementTree.ParseError as err:
raise ElementTree.ParseError(str(err))
else:
return ElementTree.fromstring(text)
self.defusing(StringIO(text))
return ElementTree.fromstring(text)

def tostring(self, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False):
"""Generates a string representation of the XML resource."""
Expand Down Expand Up @@ -550,15 +579,15 @@ def update_nsmap(prefix, uri):
try:
for event, node in self.iterparse(resource, events=('start-ns',)):
update_nsmap(*node)
except ElementTree.ParseError:
except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
pass
finally:
resource.close()
elif isinstance(self._text, string_base_type):
try:
for event, node in self.iterparse(StringIO(self._text), events=('start-ns',)):
update_nsmap(*node)
except ElementTree.ParseError:
except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
pass
else:
# Warning: can extracts namespace information only from lxml etree structures
Expand Down
41 changes: 34 additions & 7 deletions xmlschema/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import xmlschema
from xmlschema import XMLSchema
from xmlschema.compat import urlopen, URLError
from xmlschema.compat import urlopen, URLError, unicode_type
from xmlschema.exceptions import XMLSchemaValueError
from xmlschema.etree import (
is_etree_element, etree_element, etree_register_namespace, etree_elements_assert_equal
Expand Down Expand Up @@ -60,9 +60,7 @@ class XMLSchemaTestCase(unittest.TestCase):
Setup tests common environment. The tests parts have to use empty prefix for
XSD namespace names and 'ns' prefix for XMLSchema test namespace names.
"""

test_dir = os.path.dirname(__file__)
test_cases_dir = os.path.join(test_dir, 'test_cases/')
test_cases_dir = os.path.join(os.path.dirname(__file__), 'test_cases/')
etree_register_namespace(prefix='', uri=XSD_NAMESPACE)
etree_register_namespace(prefix='ns', uri="ns")
SCHEMA_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
Expand All @@ -75,6 +73,7 @@ class XMLSchemaTestCase(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.errors = []
cls.xsd_types = cls.schema_class.builtin_types()
cls.content_pattern = re.compile(r'(xs:sequence|xs:choice|xs:all)')

Expand Down Expand Up @@ -103,9 +102,9 @@ def setUpClass(cls):
@classmethod
def casepath(cls, path):
"""
Returns the absolute path for a test case file.
Returns the absolute path of a test case file.
:param path: the relative path of the case file from base dir ``test_cases/``.
:param path: the relative path of the case file from base dir ``xmlschema/tests/test_cases/``.
"""
return os.path.join(cls.test_cases_dir, path)

Expand Down Expand Up @@ -137,7 +136,7 @@ def retrieve_schema_source(self, source):
else:
source = source.strip()
if not source.startswith('<'):
return os.path.join(self.test_dir, source)
return self.casepath(source)
else:
return self.SCHEMA_TEMPLATE.format(self.schema_class.XSD_VERSION, source)

Expand All @@ -164,3 +163,31 @@ def check_namespace_prefixes(self, s):
if match:
msg = "Protected prefix {!r} found:\n {}".format(match.group(0), s)
self.assertIsNone(match, msg)

def check_errors(self, path, expected):
"""
Checks schema or validation errors, checking information completeness of the
instances and those number against expected.
:param path: the path of the test case.
:param expected: the number of expected errors.
"""
for e in self.errors:
error_string = unicode_type(e)
self.assertTrue(e.path, "Missing path for: %s" % error_string)
self.assertTrue(e.namespaces, "Missing namespaces for: %s" % error_string)
self.check_namespace_prefixes(error_string)

if not self.errors and expected:
raise ValueError("found no errors when %d expected." % expected)
elif len(self.errors) != expected:
num_errors = len(self.errors)
if num_errors == 1:
msg = "{!r}: n.{} errors expected, found {}:\n\n{}"
elif num_errors <= 5:
msg = "{!r}: n.{} errors expected, found {}. Errors follow:\n\n{}"
else:
msg = "{!r}: n.{} errors expected, found {}. First five errors follow:\n\n{}"

error_string = '\n++++++++++\n\n'.join([unicode_type(e) for e in self.errors[:5]])
raise ValueError(msg.format(path, expected, len(self.errors), error_string))
Loading

0 comments on commit 714b71d

Please sign in to comment.