From 73ade7e4330d81b73edf9052cef2e29496aec702 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 24 Jun 2022 17:05:25 +0200 Subject: [PATCH 01/17] Bump major release - New major release (v2.0.0) will require elementpat>=3.0 --- doc/conf.py | 4 ++-- publiccode.yml | 4 ++-- requirements-dev.txt | 2 +- setup.py | 10 +++++----- tox.ini | 29 +++++++---------------------- xmlschema/__init__.py | 2 +- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 8014b565..93d860b8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -78,9 +78,9 @@ # built documents. # # The short X.Y version. -version = '1.11' +version = '2.0' # The full version, including alpha/beta/rc tags. -release = '1.11.3' +release = '2.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/publiccode.yml b/publiccode.yml index 5d7db2ee..9586f1ec 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2022-06-24' -softwareVersion: v1.11.3 +releaseDate: '2022-XX-XX' +softwareVersion: v2.0.0 developmentStatus: stable platforms: - linux diff --git a/requirements-dev.txt b/requirements-dev.txt index 640d9817..792729f1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ setuptools tox coverage -elementpath>=2.5.0, <3.0.0 +-e ../elementpath # elementpath>=3.0.0, <4.0.0 lxml jinja2 memory_profiler diff --git a/setup.py b/setup.py index befebc07..d5e6017f 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='xmlschema', - version='1.11.3', + version='2.0.0', packages=find_packages(include=['xmlschema', 'xmlschema.*']), include_package_data=True, entry_points={ @@ -29,13 +29,13 @@ ] }, python_requires='>=3.7', - install_requires=['elementpath>=2.5.0, <3.0.0'], + install_requires=['elementpath>=3.0.0, <4.0.0'], extras_require={ - 'codegen': ['elementpath>=2.5.0, <3.0.0', 'jinja2'], - 'dev': ['tox', 'coverage', 'lxml', 'elementpath>=2.5.0, <3.0.0', + 'codegen': ['elementpath>=3.0.0, <4.0.0', 'jinja2'], + 'dev': ['tox', 'coverage', 'lxml', 'elementpath>=3.0.0, <4.0.0', 'memory_profiler', 'Sphinx', 'sphinx_rtd_theme', 'jinja2', 'flake8', 'mypy', 'lxml-stubs'], - 'docs': ['elementpath>=2.5.0, <3.0.0', 'Sphinx', 'sphinx_rtd_theme', 'jinja2'] + 'docs': ['elementpath>=3.0.0, <4.0.0', 'Sphinx', 'sphinx_rtd_theme', 'jinja2'] }, author='Davide Brunato', author_email='brunato@sissa.it', diff --git a/tox.ini b/tox.ini index 5899188a..29e90694 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,12 @@ [tox] -envlist = py{37,38,39,310}, pypy3, ep{250,251,252,253}, docs, +envlist = py{37,38,39,310}, pypy3, ep{300}, docs, flake8, mypy-py{37,38,39,310}, coverage, pytest skip_missing_interpreters = true toxworkdir = {homedir}/.tox/xmlschema [testenv] deps = - elementpath>=2.5.0, <3.0.0 + elementpath>=3.0.0, <4.0.0 lxml jinja2 py{39,310}: memory_profiler @@ -24,24 +24,9 @@ whitelist_externals = make [testenv:pypy3] commands = python -m unittest -[testenv:ep250] +[testenv:ep300] deps = - elementpath==2.5.0 - lxml - -[testenv:ep251] -deps = - elementpath==2.5.1 - lxml - -[testenv:ep252] -deps = - elementpath==2.5.2 - lxml - -[testenv:ep253] -deps = - elementpath==2.5.3 + elementpath==3.0.0 lxml [testenv:docs] @@ -61,7 +46,7 @@ commands = [testenv:mypy-py37] deps = mypy==0.961 - elementpath==2.5.3 + elementpath==3.0.0 lxml-stubs jinja2 commands = @@ -70,7 +55,7 @@ commands = [testenv:mypy-py{38,39,310}] deps = mypy==0.961 - elementpath==2.5.3 + elementpath==3.0.0 lxml-stubs jinja2 commands = @@ -90,7 +75,7 @@ commands = deps = pytest pytest-randomly - elementpath~=2.5.0 + elementpath~=3.0.0 lxml jinja2 mypy==0.961 diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 639fd58d..8545f81d 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -30,7 +30,7 @@ XsdComponent, XsdType, XsdElement, XsdAttribute ) -__version__ = '1.11.3' +__version__ = '2.0.0' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2016-2022, SISSA" From 52c31478dc2dcd0e2fdbaaa45d17de7913d36906 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 25 Jun 2022 15:31:37 +0200 Subject: [PATCH 02/17] Adapting XPath related parts to new XPath nodes - Fix mismatches in tests --- tests/test_xpath.py | 6 ++-- xmlschema/testing/_builders.py | 11 ++++--- xmlschema/validators/elements.py | 9 +++-- xmlschema/validators/identities.py | 20 ++++------- xmlschema/xpath.py | 53 ++---------------------------- 5 files changed, 23 insertions(+), 76 deletions(-) diff --git a/tests/test_xpath.py b/tests/test_xpath.py index 66890816..8b2a9fea 100644 --- a/tests/test_xpath.py +++ b/tests/test_xpath.py @@ -19,7 +19,7 @@ from xmlschema import XMLSchema10, XMLSchema11, XsdElement, XsdAttribute from xmlschema.names import XSD_NAMESPACE from xmlschema.etree import ElementTree -from xmlschema.xpath import XMLSchemaProxy, iter_schema_nodes, XPathElement +from xmlschema.xpath import XMLSchemaProxy, XPathElement from xmlschema.validators import XsdAtomic, XsdAtomicRestriction CASES_DIR = os.path.join(os.path.dirname(__file__), 'test_cases/') @@ -61,7 +61,7 @@ def test_bind_parser_method(self): def test_get_context_method(self): schema_proxy = XMLSchemaProxy(self.xs1) context = schema_proxy.get_context() - self.assertIs(context.root, self.xs1) + self.assertIs(context.root.value, self.xs1) def test_get_type_method(self): schema_proxy = XMLSchemaProxy(self.xs1) @@ -128,6 +128,7 @@ def test_get_primitive_type_method(self): xsd_type = self.xs3.types['integer_or_float'] self.assertIs(schema_proxy.get_primitive_type(xsd_type), xsd_type) + @unittest.skip def test_iter_schema_nodes_function(self): vh_elements = set(e for e in self.xs1.maps.iter_components(XsdElement) if e.target_namespace == self.xs1.target_namespace) @@ -273,6 +274,7 @@ def test_xpath_group(self): def test_xpath_predicate(self): car = self.xs1.elements['cars'].type.content[0] + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars['ciao']"), [self.cars]) diff --git a/xmlschema/testing/_builders.py b/xmlschema/testing/_builders.py index d1efe918..8e54aae4 100644 --- a/xmlschema/testing/_builders.py +++ b/xmlschema/testing/_builders.py @@ -124,13 +124,14 @@ def check_xsd_file(self): # XPath API tests if not inspect and not self.errors: context = XMLSchemaContext(schema) - elements = [x for x in schema.iter()] # Contains schema elements only - xpath_context_elements = [x for x in context.iter() if isinstance(x, XsdValidator)] + element_nodes = [x for x in context.root.iter() if hasattr(x, 'elem')] descendants = [x for x in context.iter_descendants('descendant-or-self')] - self.assertTrue(x in descendants for x in xpath_context_elements) - for e in elements: + self.assertTrue(x in descendants for x in element_nodes) + + context_xsd_elements = [e.value for e in element_nodes] + for xsd_element in schema.iter(): # Context elements can include elements of other schemas (by element ref) - self.assertIn(e, xpath_context_elements, msg=xsd_file) + self.assertIn(xsd_element, context_xsd_elements, msg=xsd_file) # Checks on XSD types for xsd_type in schema.maps.iter_components(xsd_classes=XsdType): diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 9010b812..479645fc 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -38,7 +38,7 @@ from .xsdbase import XSD_TYPE_DERIVATIONS, XSD_ELEMENT_DERIVATIONS, \ XsdComponent, ValidationMixin from .particles import ParticleMixin, OccursCalculator -from .identities import IdentityXPathContext, XsdIdentity, XsdKey, XsdUnique, \ +from .identities import XsdIdentity, XsdKey, XsdUnique, \ XsdKeyref, IdentityCounter, IdentityCounterType from .simple_types import XsdSimpleType from .attributes import XsdAttribute @@ -659,9 +659,8 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if isinstance(identity.elements, tuple): continue # Skip unbuilt identities - context = IdentityXPathContext( - self.schema, item=xpath_element # type: ignore[arg-type] - ) + context = XPathContext(self.schema, item=xpath_element) + for e in identity.selector.token.select_results(context): if not isinstance(e, XsdElement): reason = _("selector xpath expression can only select elements") @@ -828,7 +827,7 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if content is not None: del content - # Collects fields values for identities that refer to this element. + # Collect field values for identities that refer to this element. for identity, counter in identities.items(): if not counter.enabled or not identity.elements: continue diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index f52e4839..c4edb62f 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -22,7 +22,6 @@ from ..translation import gettext as _ from ..helpers import get_qname, get_extended_qname from ..aliases import ElementType, SchemaType, NamespacesType, AtomicValueType -from ..xpath import iter_schema_nodes from .xsdbase import XsdComponent from .attributes import XsdAttribute @@ -45,12 +44,9 @@ )) -# XSD identities use a restricted parser and a context for iterate element -# references. The XMLSchemaProxy is not used for the specific selection of -# fields and elements and the XSD fields are got at first validation run. -class IdentityXPathContext(XPathContext): - _iter_nodes = staticmethod(iter_schema_nodes) - +# XSD identities use a restricted XPath 2.0 parser. The XMLSchemaProxy is +# not used for the specific selection of fields and elements and the XSD +# fields are collected at first validation run. class IdentityXPathParser(XPath2Parser): symbol_table = { @@ -210,7 +206,7 @@ def build(self) -> None: self.fields = ref.fields self.ref = ref - context = IdentityXPathContext(self.schema, item=self.parent) # type: ignore + context = XPathContext(self.schema, item=self.parent) # type: ignore self.elements = {} try: @@ -257,15 +253,11 @@ def get_fields(self, elem: Union[ElementType, 'XsdElement'], """ fields: List[IdentityFieldItemType] = [] - if not isinstance(elem, XsdComponent): - context_class = XPathContext - else: - context_class = IdentityXPathContext - result: Any value: Union[AtomicValueType, None] for k, field in enumerate(self.fields): - result = field.token.get_results(context_class(elem)) # type: ignore + context = XPathContext(elem) + result = field.token.get_results(context) if not result: if decoders is not None and decoders[k] is not None: diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 38fb3307..62b3b98e 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -16,8 +16,8 @@ Sequence, Set, TypeVar, Union import re -from elementpath import TypedElement, XPath2Parser, \ - XPathSchemaContext, AbstractSchemaProxy, protocols +from elementpath import XPath2Parser, XPathSchemaContext, \ + AbstractSchemaProxy, protocols from .exceptions import XMLSchemaValueError, XMLSchemaTypeError from .names import XSD_NAMESPACE @@ -45,55 +45,8 @@ class ElementProtocol(protocols.ElementProtocol, Protocol): _REGEX_TAG_POSITION = re.compile(r'\b\[\d+]') -def iter_schema_nodes(root: Union[XMLSchemaProtocol, ElementProtocol], with_root: bool = True) \ - -> Iterator[Union[XMLSchemaProtocol, ElementProtocol]]: - """ - Iteration function for schema nodes. It doesn't yield text nodes, - that are always `None` for schema elements, and detects visited - element in order to skip already visited nodes. - - :param root: schema or schema element. - :param with_root: if `True` yields initial element. - """ - if isinstance(root, TypedElement): - root = cast(ElementProtocol, root.elem) - - nodes = {root} - if with_root: - yield root - - iterators: List[Any] = [] - children: Iterator[Any] = iter(root) - - while True: - try: - child = next(children) - except StopIteration: - try: - children = iterators.pop() - except IndexError: - return - else: - if child in nodes: - continue - elif child.ref is not None: - nodes.add(child) - yield child - if child.ref not in nodes: - nodes.add(child.ref) - yield child.ref - iterators.append(children) - children = iter(child.ref) - else: - nodes.add(child) - yield child - iterators.append(children) - children = iter(child) - - class XMLSchemaContext(XPathSchemaContext): """XPath dynamic schema context for the *xmlschema* library.""" - _iter_nodes = staticmethod(iter_schema_nodes) class XMLSchemaProxy(AbstractSchemaProxy): @@ -293,7 +246,7 @@ def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> It :param namespaces: is an optional mapping from namespace prefix to full name. :return: an iterable yielding all matching XSD subelements in document order. """ - path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path + path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strip tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) context = XMLSchemaContext(self) # type: ignore[arg-type] From bb8a619cb80878deb09633f59b27631da6f1d28b Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 1 Jul 2022 17:01:13 +0200 Subject: [PATCH 03/17] Add value_constraint property to wildcards for compatibility --- xmlschema/validators/wildcards.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 039eb374..963ea502 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -150,6 +150,10 @@ def _parse_not_constraints(self) -> None: def built(self) -> bool: return True + @property + def value_constraint(self) -> Optional[str]: + return None + def is_matching(self, name: Optional[str], default_namespace: Optional[str] = None, **kwargs: Any) -> bool: From ba8d49b2f487f6210d631adb52bfe0b04fc5fc5b Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 2 Jul 2022 17:23:40 +0200 Subject: [PATCH 04/17] Remove XMLSchemaContext (use base XPathSchemaContext now) --- xmlschema/testing/_builders.py | 7 ++++--- xmlschema/xpath.py | 14 +++++--------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/xmlschema/testing/_builders.py b/xmlschema/testing/_builders.py index 8e54aae4..f591c792 100644 --- a/xmlschema/testing/_builders.py +++ b/xmlschema/testing/_builders.py @@ -26,6 +26,8 @@ else: lxml_etree_element = lxml_etree.Element +from elementpath import XPathSchemaContext + import xmlschema from xmlschema import XMLSchemaBase, XMLSchema11, XMLSchemaValidationError, \ XMLSchemaParseError, UnorderedConverter, ParkerConverter, BadgerFishConverter, \ @@ -35,8 +37,7 @@ from xmlschema.etree import etree_tostring, ElementTree, \ py_etree_element from xmlschema.resources import fetch_namespaces -from xmlschema.xpath import XMLSchemaContext -from xmlschema.validators import XsdValidator, XsdType, Xsd11ComplexType +from xmlschema.validators import XsdType, Xsd11ComplexType from xmlschema.dataobjects import DataElementConverter, DataBindingConverter, DataElement try: @@ -123,7 +124,7 @@ def check_xsd_file(self): # XPath API tests if not inspect and not self.errors: - context = XMLSchemaContext(schema) + context = XPathSchemaContext(schema) element_nodes = [x for x in context.root.iter() if hasattr(x, 'elem')] descendants = [x for x in context.iter_descendants('descendant-or-self')] self.assertTrue(x in descendants for x in element_nodes) diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 62b3b98e..41e19c36 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -45,10 +45,6 @@ class ElementProtocol(protocols.ElementProtocol, Protocol): _REGEX_TAG_POSITION = re.compile(r'\b\[\d+]') -class XMLSchemaContext(XPathSchemaContext): - """XPath dynamic schema context for the *xmlschema* library.""" - - class XMLSchemaProxy(AbstractSchemaProxy): """XPath schema proxy for the *xmlschema* library.""" _schema: SchemaType # type: ignore[assignment] @@ -83,8 +79,8 @@ def bind_parser(self, parser: XPath2Parser) -> None: parser.symbol_table.update(self._schema.xpath_tokens) - def get_context(self) -> XMLSchemaContext: - return XMLSchemaContext( + def get_context(self) -> XPathSchemaContext: + return XPathSchemaContext( root=self._schema, # type: ignore[arg-type] namespaces=dict(self._schema.namespaces), item=self._base_element @@ -218,7 +214,7 @@ def find(self, path: str, namespaces: Optional[NamespacesType] = None) -> Option path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XMLSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self) # type: ignore[arg-type] return cast(Optional[E], next(parser.parse(path).select_results(context), None)) @@ -234,7 +230,7 @@ def findall(self, path: str, namespaces: Optional[NamespacesType] = None) -> Lis path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XMLSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self) # type: ignore[arg-type] return cast(List[E], parser.parse(path).get_results(context)) @@ -249,7 +245,7 @@ def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> It path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strip tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XMLSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self) # type: ignore[arg-type] return cast(Iterator[E], parser.parse(path).select_results(context)) From c121b91deac010815e434068961f6c5de94b99d9 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 5 Jul 2022 17:57:39 +0200 Subject: [PATCH 05/17] Clean DataElement with a proxy method for getting the XPathContext - A subclass can redefine this method for cachinng nodes if needed --- xmlschema/dataobjects.py | 45 ++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/xmlschema/dataobjects.py b/xmlschema/dataobjects.py index d47c8fde..ce8ac12b 100644 --- a/xmlschema/dataobjects.py +++ b/xmlschema/dataobjects.py @@ -11,7 +11,7 @@ from itertools import count from typing import TYPE_CHECKING, cast, overload, Any, Dict, List, Iterator, \ Optional, Union, Tuple, Type, MutableMapping, MutableSequence -from elementpath import XPathContext, XPath2Parser +from elementpath import XPathContext, XPath2Parser, build_node_tree, protocols from .exceptions import XMLSchemaAttributeError, XMLSchemaTypeError, XMLSchemaValueError from .etree import ElementData, etree_tostring @@ -259,6 +259,10 @@ def tostring(self, indent: str = '', max_lines: Optional[int] = None, root, errors = self.encode(validation='lax') return etree_tostring(root, self.nsmap, indent, max_lines, spaces_for_tab) + def _get_xpath_context(self) -> XPathContext: + xpath_root = build_node_tree(cast(protocols.ElementProtocol, self)) + return XPathContext(xpath_root) + def find(self, path: str, namespaces: Optional[NamespacesType] = None) -> Optional['DataElement']: """ @@ -269,7 +273,7 @@ def find(self, path: str, :return: the first matching data element or ``None`` if there is no match. """ parser = XPath2Parser(namespaces, strict=False) - context = XPathContext(cast(Any, self)) + context = self._get_xpath_context() result = next(parser.parse(path).select_results(context), None) return result if isinstance(result, DataElement) else None @@ -284,11 +288,11 @@ def findall(self, path: str, an empty list is returned if there is no match. """ parser = XPath2Parser(namespaces, strict=False) - context = XPathContext(cast(Any, self)) + context = self._get_xpath_context() results = parser.parse(path).get_results(context) if not isinstance(results, list): return [] - return [e for e in results if isinstance(e, DataElement)] + return cast(List[DataElement], [e for e in results if isinstance(e, DataElement)]) def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> Iterator['DataElement']: @@ -300,7 +304,7 @@ def iterfind(self, path: str, :return: an iterable yielding all matching data elements in document order. """ parser = XPath2Parser(namespaces, strict=False) - context = XPathContext(cast(Any, self)) + context = self._get_xpath_context() results = parser.parse(path).select_results(context) yield from filter(lambda x: isinstance(x, DataElement), results) @@ -396,16 +400,20 @@ def copy(self, **kwargs: Any) -> 'DataElementConverter': obj.data_element_class = kwargs.get('data_element_class', self.data_element_class) return obj - def element_decode(self, data: ElementData, xsd_element: 'XsdElement', - xsd_type: Optional[BaseXsdType] = None, level: int = 0) -> 'DataElement': - data_element = self.data_element_class( + def get_data_element(self, data: ElementData, xsd_element: 'XsdElement', + xsd_type: Optional[BaseXsdType] = None) -> DataElement: + return self.data_element_class( tag=data.tag, value=data.text, nsmap=self.namespaces, xsd_element=xsd_element, xsd_type=xsd_type ) - data_element.attrib.update((k, v) for k, v in self.map_attributes(data.attributes)) + + def element_decode(self, data: ElementData, xsd_element: 'XsdElement', + xsd_type: Optional[BaseXsdType] = None, level: int = 0) -> 'DataElement': + data_element = self.get_data_element(data, xsd_element, xsd_type) + data_element.attrib.update(self.map_attributes(data.attributes)) if (xsd_type or xsd_element.type).model_group is not None: for name, value, _ in self.map_content(data.content): @@ -454,25 +462,12 @@ class DataBindingConverter(DataElementConverter): """ __slots__ = () - def element_decode(self, data: ElementData, xsd_element: 'XsdElement', - xsd_type: Optional[BaseXsdType] = None, level: int = 0) -> 'DataElement': + def get_data_element(self, data: ElementData, xsd_element: 'XsdElement', + xsd_type: Optional[BaseXsdType] = None) -> DataElement: cls = xsd_element.get_binding(self.data_element_class) - data_element = cls( + return cls( tag=data.tag, value=data.text, nsmap=self.namespaces, xsd_type=xsd_type ) - data_element.attrib.update((k, v) for k, v in self.map_attributes(data.attributes)) - - if (xsd_type or xsd_element.type).model_group is not None: - for name, value, _ in self.map_content(data.content): - if not name.isdigit(): - data_element.append(value) - else: - try: - data_element[-1].tail = value - except IndexError: - data_element.value = value - - return data_element From beafef3cb63af9c645463c687ca195a44aed9148 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 5 Jul 2022 22:31:30 +0200 Subject: [PATCH 06/17] Clean XPath interfaces with proper protocols --- tests/test_xpath.py | 49 +++++------------------------- xmlschema/resources.py | 11 ++++++- xmlschema/testing/_builders.py | 8 ++--- xmlschema/validators/assertions.py | 6 ++-- xmlschema/validators/elements.py | 8 ++--- xmlschema/validators/facets.py | 4 +-- xmlschema/validators/identities.py | 14 +++++++-- xmlschema/validators/schemas.py | 5 +-- xmlschema/validators/wildcards.py | 6 ++-- xmlschema/xpath.py | 47 ++++++++++++++-------------- 10 files changed, 72 insertions(+), 86 deletions(-) diff --git a/tests/test_xpath.py b/tests/test_xpath.py index 8b2a9fea..20082d53 100644 --- a/tests/test_xpath.py +++ b/tests/test_xpath.py @@ -13,10 +13,9 @@ import unittest import os import pathlib -from elementpath import XPath1Parser, XPath2Parser, Selector, \ - AttributeNode, TypedElement, ElementPathSyntaxError +from elementpath import XPath1Parser, XPath2Parser, Selector -from xmlschema import XMLSchema10, XMLSchema11, XsdElement, XsdAttribute +from xmlschema import XMLSchema10, XMLSchema11 from xmlschema.names import XSD_NAMESPACE from xmlschema.etree import ElementTree from xmlschema.xpath import XMLSchemaProxy, XPathElement @@ -118,41 +117,6 @@ def test_iter_atomic_types_method(self): self.assertIsInstance(xsd_type, (XsdAtomic, XsdAtomicRestriction)) self.assertGreater(k, 10) - def test_get_primitive_type_method(self): - schema_proxy = XMLSchemaProxy(self.xs3) - - string_type = self.xs3.meta_schema.types['string'] - xsd_type = self.xs3.types['list_of_strings'] - self.assertIs(schema_proxy.get_primitive_type(xsd_type), string_type) - - xsd_type = self.xs3.types['integer_or_float'] - self.assertIs(schema_proxy.get_primitive_type(xsd_type), xsd_type) - - @unittest.skip - def test_iter_schema_nodes_function(self): - vh_elements = set(e for e in self.xs1.maps.iter_components(XsdElement) - if e.target_namespace == self.xs1.target_namespace) - - self.assertEqual(set(iter_schema_nodes(self.xs1)), vh_elements | {self.xs1}) - self.assertEqual(set(iter_schema_nodes(self.xs1, with_root=False)), vh_elements) - - vh_nodes = set() - for node in self.xs1.maps.iter_components((XsdElement, XsdAttribute)): - if node.target_namespace != self.xs1.target_namespace: - continue - elif isinstance(node, XsdAttribute): - vh_nodes.add(AttributeNode(node.local_name, node)) - else: - vh_nodes.add(node) - - cars = self.xs1.elements['cars'] - car = self.xs1.find('//vh:car') - typed_cars = TypedElement(cars, cars.type, None) - self.assertListEqual(list(iter_schema_nodes(cars)), [cars, car]) - self.assertListEqual(list(iter_schema_nodes(typed_cars)), [cars, car]) - self.assertListEqual(list(iter_schema_nodes(cars, with_root=False)), [car]) - self.assertListEqual(list(iter_schema_nodes(typed_cars, with_root=False)), [car]) - class XPathElementTest(unittest.TestCase): @@ -221,6 +185,7 @@ def test_elem_name(self): class XMLSchemaXPathTest(unittest.TestCase): schema_class = XMLSchema10 + xs1: XMLSchema10 @classmethod def setUpClass(cls): @@ -230,10 +195,10 @@ def setUpClass(cls): cls.bikes = cls.xs1.elements['vehicles'].type.content[1] def test_xpath_wrong_syntax(self): - self.assertRaises(ElementPathSyntaxError, self.xs1.find, './*[') - self.assertRaises(ElementPathSyntaxError, self.xs1.find, './*)') - self.assertRaises(ElementPathSyntaxError, self.xs1.find, './*3') - self.assertRaises(ElementPathSyntaxError, self.xs1.find, './@3') + self.assertRaises(SyntaxError, self.xs1.find, './*[') + self.assertRaises(SyntaxError, self.xs1.find, './*)') + self.assertRaises(SyntaxError, self.xs1.find, './*3') + self.assertRaises(SyntaxError, self.xs1.find, './@3') def test_xpath_extra_spaces(self): self.assertTrue(self.xs1.find('./ *') is not None) diff --git a/xmlschema/resources.py b/xmlschema/resources.py index 81fb02da..de274c15 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -20,7 +20,8 @@ from urllib.parse import urlsplit, urlunsplit, unquote, quote_from_bytes from urllib.error import URLError -from elementpath import iter_select, XPathContext, XPath2Parser +from elementpath import iter_select, XPathContext, XPath2Parser, get_node_tree, \ + ElementNode, DocumentNode from elementpath.protocols import ElementProtocol from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError @@ -455,6 +456,7 @@ class XMLResource: # Protected attributes for data and resource location _source: XMLSourceType _root: ElementType + _xpath_root: Union[None, ElementNode, DocumentNode] = None _nsmap: Dict[ElementType, List[Tuple[str, str]]] _text: Optional[str] = None _url: Optional[str] = None @@ -525,6 +527,13 @@ def root(self) -> ElementType: """The XML tree root Element.""" return self._root + @property + def xpath_root(self) -> ElementType: + """The XPath tree root node.""" + if self._xpath_root is None: + self._xpath_root = get_node_tree(self._root) + return self._xpath_root + @property def text(self) -> Optional[str]: """The XML text source, `None` if it's not available.""" diff --git a/xmlschema/testing/_builders.py b/xmlschema/testing/_builders.py index f591c792..49bd0774 100644 --- a/xmlschema/testing/_builders.py +++ b/xmlschema/testing/_builders.py @@ -122,11 +122,11 @@ def check_xsd_file(self): self.assertTrue(isinstance(deserialized_schema, XMLSchemaBase), msg=xsd_file) self.assertEqual(schema.built, deserialized_schema.built, msg=xsd_file) - # XPath API tests + # XPath node tree tests if not inspect and not self.errors: - context = XPathSchemaContext(schema) - element_nodes = [x for x in context.root.iter() if hasattr(x, 'elem')] - descendants = [x for x in context.iter_descendants('descendant-or-self')] + xpath_root = schema.xpath_node + element_nodes = [x for x in xpath_root.iter() if hasattr(x, 'elem')] + descendants = [x for x in xpath_root.iter_descendants('descendant-or-self')] self.assertTrue(x in descendants for x in element_nodes) context_xsd_elements = [e.value for e in element_nodes] diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 71b5ee6b..afc66c93 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -14,7 +14,7 @@ from ..names import XSD_ASSERT from ..aliases import ElementType, SchemaType, SchemaElementType, NamespacesType from ..translation import gettext as _ -from ..xpath import XMLSchemaProtocol, ElementProtocol, ElementPathMixin, XMLSchemaProxy +from ..xpath import XsdSchemaProtocol, XsdElementProtocol, ElementPathMixin, XMLSchemaProxy from .exceptions import XMLSchemaNotBuiltError, XMLSchemaValidationError from .xsdbase import XsdComponent @@ -158,6 +158,6 @@ def type(self) -> 'XsdComplexType': @property def xpath_proxy(self) -> 'XMLSchemaProxy': return XMLSchemaProxy( - schema=cast(XMLSchemaProtocol, self.schema), - base_element=cast(ElementProtocol, self) + schema=cast(XsdSchemaProtocol, self.schema), + base_element=cast(XsdElementProtocol, self) ) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 479645fc..d31227ea 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -30,7 +30,7 @@ raw_xml_encode, strictly_equal from .. import dataobjects from ..converters import XMLSchemaConverter -from ..xpath import XMLSchemaProtocol, ElementProtocol, XMLSchemaProxy, \ +from ..xpath import XsdSchemaProtocol, XsdElementProtocol, XMLSchemaProxy, \ ElementPathMixin, XPathElement from .exceptions import XMLSchemaValidationError, XMLSchemaTypeTableWarning @@ -384,8 +384,8 @@ def _parse_substitution_group(self, substitution_group: str) -> None: @property def xpath_proxy(self) -> XMLSchemaProxy: return XMLSchemaProxy( - schema=cast(XMLSchemaProtocol, self.schema), - base_element=cast(ElementProtocol, self) + schema=cast(XsdSchemaProtocol, self.schema), + base_element=cast(XsdElementProtocol, self) ) def build(self) -> None: @@ -659,7 +659,7 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if isinstance(identity.elements, tuple): continue # Skip unbuilt identities - context = XPathContext(self.schema, item=xpath_element) + context = XPathContext(self.schema.xpath_node, item=xpath_element) for e in identity.selector.token.select_results(context): if not isinstance(e, XsdElement): diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index d75d4b71..b9bcd043 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, cast, Any, List, Optional, Pattern, Union, \ MutableSequence, overload, Tuple from elementpath import XPath2Parser, XPathContext, ElementPathError, \ - translate_pattern, RegexError + translate_pattern, RegexError, ElementNode from ..names import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ XSD_INTEGER, XSD_WHITE_SPACE, XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, \ @@ -777,7 +777,7 @@ class XsdAssertionFacet(XsdFacet): """ _ADMITTED_TAGS = {XSD_ASSERTION} - _root = etree_element('root') + _root = ElementNode(elem=etree_element('root')) def __repr__(self) -> str: return '%s(test=%r)' % (self.__class__.__name__, self.path) diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index c4edb62f..b7d2bce7 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Pattern, \ Tuple, Union, Counter from elementpath import XPath2Parser, ElementPathError, XPathToken, XPathContext, \ - translate_pattern, datatypes + translate_pattern, datatypes, get_node_tree from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError from ..names import XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD @@ -206,7 +206,13 @@ def build(self) -> None: self.fields = ref.fields self.ref = ref - context = XPathContext(self.schema, item=self.parent) # type: ignore + xpath_root = self.schema.xpath_node + try: + xpath_item = xpath_root.elements[self.parent] + except KeyError: + context = XPathContext(xpath_root, item=self.parent.xpath_node) # type: ignore + else: + context = XPathContext(xpath_root, item=xpath_item) # type: ignore self.elements = {} try: @@ -255,8 +261,10 @@ def get_fields(self, elem: Union[ElementType, 'XsdElement'], result: Any value: Union[AtomicValueType, None] + root_node = get_node_tree(elem) + for k, field in enumerate(self.fields): - context = XPathContext(elem) + context = XPathContext(root_node) result = field.token.get_results(context) if not result: diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index 526e4c03..0018c81e 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -54,7 +54,7 @@ from ..resources import is_local_url, is_remote_url, url_path_is_file, \ normalize_locations, fetch_resource, normalize_url, XMLResource from ..converters import XMLSchemaConverter -from ..xpath import XMLSchemaProtocol, XMLSchemaProxy, ElementPathMixin +from ..xpath import XsdSchemaProtocol, XMLSchemaProxy, ElementPathMixin from .. import dataobjects from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchemaEncodeError, \ @@ -570,7 +570,7 @@ def __len__(self) -> int: @property def xpath_proxy(self) -> XMLSchemaProxy: - return XMLSchemaProxy(cast(XMLSchemaProtocol, self)) + return XMLSchemaProxy(cast(XsdSchemaProtocol, self)) @property def xsd_version(self) -> str: @@ -996,6 +996,7 @@ def build(self) -> None: def clear(self) -> None: """Clears the schema's XSD global maps.""" self.maps.clear() + self._xpath_node = None @property def built(self) -> bool: diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 963ea502..8a7c024d 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -21,7 +21,7 @@ DecodedValueType, EncodedValueType from ..translation import gettext as _ from ..helpers import get_namespace, raw_xml_encode -from ..xpath import XMLSchemaProtocol, ElementProtocol, XMLSchemaProxy, ElementPathMixin +from ..xpath import XsdSchemaProtocol, XsdElementProtocol, XMLSchemaProxy, ElementPathMixin from .xsdbase import ValidationMixin, XsdComponent from .particles import ParticleMixin from . import elements @@ -423,8 +423,8 @@ def __repr__(self) -> str: @property def xpath_proxy(self) -> XMLSchemaProxy: return XMLSchemaProxy( - schema=cast(XMLSchemaProtocol, self.schema), - base_element=cast(ElementProtocol, self) + schema=cast(XsdSchemaProtocol, self.schema), + base_element=cast(XsdElementProtocol, self) ) def _parse(self) -> None: diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 41e19c36..5e256a9f 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -17,7 +17,7 @@ import re from elementpath import XPath2Parser, XPathSchemaContext, \ - AbstractSchemaProxy, protocols + AbstractSchemaProxy, protocols, SchemaNode, build_schema_node_tree from .exceptions import XMLSchemaValueError, XMLSchemaTypeError from .names import XSD_NAMESPACE @@ -25,20 +25,18 @@ from .helpers import get_qname, local_name, get_prefixed_qname if sys.version_info < (3, 8): - XMLSchemaProtocol = SchemaType - ElementProtocol = XPathElementType + XsdSchemaProtocol = SchemaType + XsdElementProtocol = XPathElementType XsdTypeProtocol = BaseXsdType else: from typing import runtime_checkable, Protocol XsdTypeProtocol = protocols.XsdTypeProtocol - - class XMLSchemaProtocol(protocols.XMLSchemaProtocol, Protocol): - attributes: Dict[str, Any] + XsdSchemaProtocol = protocols.XsdSchemaProtocol @runtime_checkable - class ElementProtocol(protocols.ElementProtocol, Protocol): - schema: XMLSchemaProtocol + class XsdElementProtocol(protocols.XsdElementProtocol, Protocol): + schema: XsdSchemaProtocol attributes: Dict[str, Any] @@ -49,12 +47,12 @@ class XMLSchemaProxy(AbstractSchemaProxy): """XPath schema proxy for the *xmlschema* library.""" _schema: SchemaType # type: ignore[assignment] - def __init__(self, schema: Optional[XMLSchemaProtocol] = None, - base_element: Optional[ElementProtocol] = None) -> None: + def __init__(self, schema: Optional[XsdSchemaProtocol] = None, + base_element: Optional[XsdElementProtocol] = None) -> None: if schema is None: from xmlschema import XMLSchema10 - schema = cast(XMLSchemaProtocol, getattr(XMLSchema10, 'meta_schema', None)) + schema = cast(XsdSchemaProtocol, getattr(XMLSchema10, 'meta_schema', None)) super(XMLSchemaProxy, self).__init__(schema, base_element) @@ -81,7 +79,7 @@ def bind_parser(self, parser: XPath2Parser) -> None: def get_context(self) -> XPathSchemaContext: return XPathSchemaContext( - root=self._schema, # type: ignore[arg-type] + root=self._schema.xpath_node, namespaces=dict(self._schema.namespaces), item=self._base_element ) @@ -116,10 +114,6 @@ def iter_atomic_types(self) -> Iterator[XsdTypeProtocol]: hasattr(xsd_type, 'primitive_type'): yield cast(XsdTypeProtocol, xsd_type) - def get_primitive_type(self, xsd_type: XsdTypeProtocol) -> XsdTypeProtocol: - primitive_type = cast(BaseXsdType, xsd_type).root_type - return cast(XsdTypeProtocol, primitive_type) - E = TypeVar('E', bound='ElementPathMixin[Any]') @@ -137,6 +131,7 @@ class ElementPathMixin(Sequence[E]): attributes: Any = {} namespaces: Any = {} xpath_default_namespace = '' + _xpath_node: Optional[SchemaNode] = None @abstractmethod def __iter__(self) -> Iterator[E]: @@ -179,6 +174,14 @@ def xpath_proxy(self) -> XMLSchemaProxy: """Returns an XPath proxy instance bound with the schema.""" raise NotImplementedError + @property + def xpath_node(self) -> SchemaNode: + if self._xpath_node is None: + self._xpath_node = build_schema_node_tree( + cast(Union[XsdSchemaProtocol], self) + ) + return self._xpath_node + def _get_xpath_namespaces(self, namespaces: Optional[NamespacesType] = None) \ -> Dict[str, str]: """ @@ -214,7 +217,7 @@ def find(self, path: str, namespaces: Optional[NamespacesType] = None) -> Option path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XPathSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self.xpath_node) return cast(Optional[E], next(parser.parse(path).select_results(context), None)) @@ -227,10 +230,10 @@ def findall(self, path: str, namespaces: Optional[NamespacesType] = None) -> Lis :return: a list containing all matching XSD subelements in document order, an empty \ list is returned if there is no match. """ - path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path + path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strip tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XPathSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self.xpath_node) return cast(List[E], parser.parse(path).get_results(context)) @@ -245,7 +248,7 @@ def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> It path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strip tags positions from path namespaces = self._get_xpath_namespaces(namespaces) parser = XPath2Parser(namespaces, strict=False) - context = XPathSchemaContext(self) # type: ignore[arg-type] + context = XPathSchemaContext(self.xpath_node) return cast(Iterator[E], parser.parse(path).select_results(context)) @@ -303,8 +306,8 @@ def __iter__(self) -> Iterator['XPathElement']: @property def xpath_proxy(self) -> XMLSchemaProxy: return XMLSchemaProxy( - cast(XMLSchemaProtocol, self.schema), - cast(ElementProtocol, self) + cast(XsdSchemaProtocol, self.schema), + cast(XsdElementProtocol, self) ) @property From e9c138d1f146542989065b1a8a67d5dbc40b9112 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 6 Jul 2022 14:59:04 +0200 Subject: [PATCH 07/17] Fix BadgerFishConverter decode of mixed content (issue #315) - Check also for #316 (is a misconfiguration, not a bug) - Fix etree_elements_assert_equal() testing helper --- .../issues/issue_315/issue_315-1.xml | 1 + .../issues/issue_315/issue_315-2.xml | 1 + .../issues/issue_315/issue_315-3.xml | 1 + .../issues/issue_315/issue_315-4.xml | 1 + .../issues/issue_315/issue_315-5.xml | 1 + .../issues/issue_315/issue_315_mixed.xsd | 16 +++ .../issues/issue_315/issue_315_simple.xsd | 13 +++ tests/test_converters.py | 97 ++++++++++++++++++- tests/test_etree.py | 2 +- xmlschema/converters/badgerfish.py | 6 ++ xmlschema/testing/_helpers.py | 14 ++- 11 files changed, 143 insertions(+), 10 deletions(-) create mode 100644 tests/test_cases/issues/issue_315/issue_315-1.xml create mode 100644 tests/test_cases/issues/issue_315/issue_315-2.xml create mode 100644 tests/test_cases/issues/issue_315/issue_315-3.xml create mode 100644 tests/test_cases/issues/issue_315/issue_315-4.xml create mode 100644 tests/test_cases/issues/issue_315/issue_315-5.xml create mode 100644 tests/test_cases/issues/issue_315/issue_315_mixed.xsd create mode 100644 tests/test_cases/issues/issue_315/issue_315_simple.xsd diff --git a/tests/test_cases/issues/issue_315/issue_315-1.xml b/tests/test_cases/issues/issue_315/issue_315-1.xml new file mode 100644 index 00000000..8b68b0b9 --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315-1.xml @@ -0,0 +1 @@ +bar \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315-2.xml b/tests/test_cases/issues/issue_315/issue_315-2.xml new file mode 100644 index 00000000..276160bc --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315-2.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315-3.xml b/tests/test_cases/issues/issue_315/issue_315-3.xml new file mode 100644 index 00000000..eac26511 --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315-3.xml @@ -0,0 +1 @@ +bar \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315-4.xml b/tests/test_cases/issues/issue_315/issue_315-4.xml new file mode 100644 index 00000000..387bfcc1 --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315-4.xml @@ -0,0 +1 @@ +bar \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315-5.xml b/tests/test_cases/issues/issue_315/issue_315-5.xml new file mode 100644 index 00000000..4c2d3d56 --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315-5.xml @@ -0,0 +1 @@ +bar \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315_mixed.xsd b/tests/test_cases/issues/issue_315/issue_315_mixed.xsd new file mode 100644 index 00000000..e1ee3c0c --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315_mixed.xsd @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_cases/issues/issue_315/issue_315_simple.xsd b/tests/test_cases/issues/issue_315/issue_315_simple.xsd new file mode 100644 index 00000000..c8408338 --- /dev/null +++ b/tests/test_cases/issues/issue_315/issue_315_simple.xsd @@ -0,0 +1,13 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_converters.py b/tests/test_converters.py index 5a47364e..0b93c007 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -18,7 +18,7 @@ lxml_etree = None from xmlschema import XMLSchema, XMLSchemaValidationError, fetch_namespaces -from xmlschema.etree import etree_element +from xmlschema.etree import etree_element, etree_tostring from xmlschema.dataobjects import DataElement from xmlschema.testing import etree_elements_assert_equal @@ -30,6 +30,10 @@ class TestConverters(unittest.TestCase): + col_xsd_filename: str + col_xml_filename: str + col_nsmap: dict + @classmethod def setUpClass(cls): cls.col_xsd_filename = cls.casepath('examples/collection/collection.xsd') @@ -482,9 +486,10 @@ def test_decode_encode_data_element_converter(self): # With ElementTree namespaces are not mapped obj2 = col_schema.decode(self.col_xml_root) - - # Equivalent if compared as Element trees (tag, text, attrib, tail) - self.assertIsNone(etree_elements_assert_equal(obj1, obj2)) + with self.assertRaises(AssertionError) as ctx: + # Equivalent if compared as Element trees (tag, text, attrib, tail) + etree_elements_assert_equal(obj1, obj2) + self.assertIn("attributes differ: {'xsi:schemaLocation'", str(ctx.exception)) self.assertIsNone(etree_elements_assert_equal( obj1, col_schema.decode(self.col_xml_root, namespaces=self.col_nsmap) @@ -502,6 +507,90 @@ def test_decode_encode_data_element_converter(self): root = col_schema.encode(obj2) # No namespace unmap is required self.assertIsNone(etree_elements_assert_equal(self.col_xml_root, root, strict=False)) + def test_simple_content__issue_315(self): + schema = XMLSchema(self.casepath('issues/issue_315/issue_315_simple.xsd')) + converters = ( + XMLSchemaConverter, XMLSchemaConverter(preserve_root=True), + BadgerFishConverter, AbderaConverter, JsonMLConverter, + UnorderedConverter, ColumnarConverter, DataElementConverter + ) + + for k in range(1, 6): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + if k < 3: + self.assertIsNone(schema.validate(xml_filename), xml_filename) + else: + self.assertFalse(schema.is_valid(xml_filename), xml_filename) + + for k in (1, 2): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + xml_tree = ElementTree.parse(xml_filename).getroot() + for converter in converters: + obj = schema.decode(xml_filename, converter=converter) + root = schema.encode(obj, converter=converter) + self.assertIsNone(etree_elements_assert_equal(xml_tree, root)) + + def test_mixed_content__issue_315(self): + schema = XMLSchema(self.casepath('issues/issue_315/issue_315_mixed.xsd')) + losslessly_converters = (JsonMLConverter, DataElementConverter) + default_converters = ( + XMLSchemaConverter(cdata_prefix='#'), + UnorderedConverter(cdata_prefix='#'), # BadgerFishConverter, ColumnarConverter, + ) + + for k in range(1, 6): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + self.assertIsNone(schema.validate(xml_filename), xml_filename) + + for k in range(1, 6): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + xml_tree = ElementTree.parse(xml_filename).getroot() + for converter in losslessly_converters: + obj = schema.decode(xml_filename, converter=converter) + root = schema.encode(obj, converter=converter) + self.assertIsNone(etree_elements_assert_equal(xml_tree, root, strict=False)) + + for k in range(1, 6): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + xml_tree = ElementTree.parse(xml_filename).getroot() + for converter in default_converters: + obj = schema.decode(xml_filename, converter=converter) + root = schema.encode(obj, converter=converter, indent=0) + if k < 4: + self.assertIsNone(etree_elements_assert_equal(xml_tree, root, strict=False)) + continue + + if k == 4: + self.assertEqual(obj, {'@xmlns:tst': 'http://xmlschema.test/ns', + '@a1': 'foo', 'e2': [None, None], '#1': 'bar'}) + self.assertEqual(len(root), 2) + else: + self.assertEqual(obj, {'@xmlns:tst': 'http://xmlschema.test/ns', + '@a1': 'foo', 'e2': [None], '#1': 'bar'}) + self.assertEqual(len(root), 1) + + text = etree_tostring(root, namespaces={'tst': 'http://xmlschema.test/ns'}) + self.assertEqual(len(text.split('bar')), 2) + + for k in range(1, 6): + xml_filename = self.casepath(f'issues/issue_315/issue_315-{k}.xml') + xml_tree = ElementTree.parse(xml_filename).getroot() + obj = schema.decode(xml_filename, converter=BadgerFishConverter) + root = schema.encode(obj, converter=BadgerFishConverter, indent=0) + if k < 4: + self.assertIsNone(etree_elements_assert_equal(xml_tree, root, strict=False)) + continue + + if k == 4: + self.assertEqual(obj, {'@xmlns': {'tst': 'http://xmlschema.test/ns'}, + 'tst:e1': {'@a1': 'foo', 'e2': [{}, {}], '$1': 'bar'}}) + else: + self.assertEqual(obj, {'@xmlns': {'tst': 'http://xmlschema.test/ns'}, + 'tst:e1': {'@a1': 'foo', 'e2': [{}], '$1': 'bar'}}) + + text = etree_tostring(root, namespaces={'tst': 'http://xmlschema.test/ns'}) + self.assertEqual(len(text.split('bar')), 2) + if __name__ == '__main__': import platform diff --git a/tests/test_etree.py b/tests/test_etree.py index 924b9b67..b7b98f38 100644 --- a/tests/test_etree.py +++ b/tests/test_etree.py @@ -232,7 +232,7 @@ def test_etree_elements_assert_equal(self): e2 = ElementTree.XML('text\n\n') with self.assertRaises(AssertionError) as ctx: etree_elements_assert_equal(e1, e2) - self.assertIn("has lesser children than text \n\n') self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) diff --git a/xmlschema/converters/badgerfish.py b/xmlschema/converters/badgerfish.py index 3036c928..217e4737 100644 --- a/xmlschema/converters/badgerfish.py +++ b/xmlschema/converters/badgerfish.py @@ -59,6 +59,9 @@ def element_decode(self, data: ElementData, xsd_element: 'XsdElement', if xsd_group is None: if data.text is not None and data.text != '': result_dict['$'] = data.text + elif not data.content: + if data.text is not None and data.text != '': + result_dict['$1'] = data.text else: has_single_group = xsd_group.is_single() for name, value, xsd_child in self.map_content(data.content): @@ -125,6 +128,9 @@ def element_encode(self, obj: Any, xsd_element: 'XsdElement', level: int = 0) -> content: List[Tuple[Union[str, int], Any]] = [] attributes = {} + if isinstance(element_data, list): + print(element_data) + for name, value in element_data.items(): if name == '@xmlns': continue diff --git a/xmlschema/testing/_helpers.py b/xmlschema/testing/_helpers.py index 1e4c244b..8cd96f8f 100644 --- a/xmlschema/testing/_helpers.py +++ b/xmlschema/testing/_helpers.py @@ -35,7 +35,7 @@ def iter_nested_items(items: Union[Dict[Any, Any], List[Any]], def etree_elements_assert_equal(elem: etree_element, other: etree_element, strict: bool = True, skip_comments: bool = True, - unordered: bool = False) -> None: + unordered: bool = False, level: int = 0) -> None: """ Tests the equality of two XML Element trees. @@ -44,11 +44,15 @@ def etree_elements_assert_equal(elem: etree_element, other: etree_element, :param strict: asserts strictly equality. `True` for default. :param skip_comments: skip comments from comparison. :param unordered: children may have different order. + :param level: level of the examined elements (1 refer to roots). :raise: an AssertionError containing information about first difference encountered. """ children: Union[etree_element, List[etree_element]] - - if unordered: + if not level: + # incapsulate roots in lists in order to do a full check + children = [elem] + other_children = iter([other]) + elif unordered: children = sorted(elem, key=lambda x: '' if callable(x.tag) else x.tag) other_children = iter(sorted( other, key=lambda x: '' if callable(x.tag) else x.tag @@ -148,7 +152,7 @@ def etree_elements_assert_equal(elem: etree_element, other: etree_element, # Tail if e1.tail != e2.tail: message = "%r != %r: tails differ: %r != %r" % (e1, e2, e1.tail, e2.tail) - if strict: + if strict and level: raise AssertionError(message) elif e1.tail is None: if e2.tail is not None and e2.tail.strip(): @@ -159,7 +163,7 @@ def etree_elements_assert_equal(elem: etree_element, other: etree_element, elif e1.tail.strip() != e2.tail.strip(): raise AssertionError(message) - etree_elements_assert_equal(e1, e2, strict, skip_comments, unordered) + etree_elements_assert_equal(e1, e2, strict, skip_comments, unordered, level=level+1) try: next(other_children) From 34a06cc4b5a95b20b7e423d29ccf41a74f804989 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 8 Jul 2022 23:20:17 +0200 Subject: [PATCH 08/17] Add xpath_node properties with node builders --- tests/test_resources.py | 35 +------- xmlschema/helpers.py | 2 +- xmlschema/resources.py | 135 +++++++++++++++-------------- xmlschema/validators/assertions.py | 15 +++- xmlschema/validators/elements.py | 37 ++++++-- xmlschema/validators/identities.py | 55 +++++------- xmlschema/validators/schemas.py | 10 ++- xmlschema/validators/wildcards.py | 14 +++ xmlschema/xpath.py | 20 +++-- 9 files changed, 171 insertions(+), 152 deletions(-) diff --git a/tests/test_resources.py b/tests/test_resources.py index a7884c2f..eed38cfb 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -33,7 +33,7 @@ from xmlschema.names import XSD_NAMESPACE import xmlschema.resources from xmlschema.resources import is_url, is_local_url, is_remote_url, \ - url_path_is_file, normalize_locations, LazySelector + url_path_is_file, normalize_locations from xmlschema.testing import SKIP_REMOTE_TESTS @@ -1134,10 +1134,6 @@ def test_xml_resource_find(self): self.assertIs(resource.find('*/c2', nsmap=nsmap), resource.root[0][1]) self.assertListEqual(nsmap, [('tns2', 'http://example.com/ns2')]) - nsmap = [] - self.assertEqual(resource.find('*/c2/@x', nsmap=nsmap), '2') - self.assertListEqual(nsmap, []) - nsmap = [] ancestors = [] self.assertIs(resource.find('*/c2', nsmap=nsmap, ancestors=ancestors), @@ -1145,12 +1141,6 @@ def test_xml_resource_find(self): self.assertListEqual(nsmap, [('tns2', 'http://example.com/ns2')]) self.assertListEqual(ancestors, [resource.root, resource.root[0]]) - nsmap = [] - ancestors = [] - self.assertEqual(resource.find('*/c2/@x', nsmap=nsmap, ancestors=ancestors), '2') - self.assertListEqual(nsmap, []) - self.assertListEqual(ancestors, []) - nsmap = [] ancestors = [] self.assertIs(resource.find('.', nsmap=nsmap, ancestors=ancestors), @@ -1453,29 +1443,6 @@ def __getattr__(self, attr): self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'}) self.assertFalse(xml_file.closed) - def test_lazy_selector(self): - selector = LazySelector('./*') - self.assertEqual(repr(selector), "LazySelector(path='./*')") - - with self.assertRaises(SyntaxError): - LazySelector('self::*') - - root = ElementTree.XML('') - self.assertListEqual(selector.select(root), root[:]) - self.assertListEqual(list(selector.iter_select(root)), root[:]) - - selector = LazySelector('./b1/@c') - - with self.assertRaises(XMLResourceError) as ctx: - selector.select(root) - self.assertEqual("XPath expressions on lazy resources can " - "select only elements", str(ctx.exception)) - - with self.assertRaises(XMLResourceError) as ctx: - list(selector.iter_select(root)) - self.assertEqual("XPath expressions on lazy resources can " - "select only elements", str(ctx.exception)) - def test_parent_map(self): root = ElementTree.XML('') resource = XMLResource(root) diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index 9877894a..b48f6ff6 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -184,7 +184,7 @@ def etree_iterpath(elem: ElementType, for child in elem: if callable(child.tag): - continue # Skip lxml comments + continue # Skip comments and PIs child_name = child.tag if namespaces is None else get_prefixed_qname(child.tag, namespaces) if path == '/': diff --git a/xmlschema/resources.py b/xmlschema/resources.py index de274c15..e2323e35 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -20,69 +20,21 @@ from urllib.parse import urlsplit, urlunsplit, unquote, quote_from_bytes from urllib.error import URLError -from elementpath import iter_select, XPathContext, XPath2Parser, get_node_tree, \ - ElementNode, DocumentNode -from elementpath.protocols import ElementProtocol +from elementpath import XPathContext, XPath2Parser, ElementNode, LazyElementNode, \ + DocumentNode, build_lxml_node_tree, build_node_tree from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError from .names import XML_NAMESPACE from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring from .aliases import ElementType, ElementTreeType, NamespacesType, XMLSourceType, \ NormalizedLocationsType, LocationsType, NsmapType, ParentMapType -from .helpers import get_namespace, is_etree_element, is_etree_document, \ - etree_iter_location_hints +from .helpers import get_namespace, is_etree_document, etree_iter_location_hints DEFUSE_MODES = frozenset(('never', 'remote', 'nonlocal', 'always')) SECURITY_MODES = frozenset(('all', 'remote', 'local', 'sandbox', 'none')) - -### -# Restricted XPath parser for XML resources -LAZY_XML_XPATH_SYMBOLS = frozenset(( - 'position', 'last', 'not', 'and', 'or', '!=', '<=', '>=', '(', ')', 'text', - '[', ']', '.', ',', '/', '|', '*', '=', '<', '>', ':', '@', '(end)', - '(unknown)', '(invalid)', '(name)', '(string)', '(float)', '(decimal)', - '(integer)' -)) - DRIVE_LETTERS = frozenset(string.ascii_letters) -class LazyXPath2Parser(XPath2Parser): - symbol_table = { - k: v for k, v in XPath2Parser.symbol_table.items() # type: ignore[misc] - if k in LAZY_XML_XPATH_SYMBOLS - } - SYMBOLS = LAZY_XML_XPATH_SYMBOLS - - -class LazySelector: - """A limited XPath selector class for lazy XML resources.""" - - def __init__(self, path: str, namespaces: Optional[NamespacesType] = None) -> None: - self.parser = LazyXPath2Parser(namespaces, strict=False) - self.path = path - self.root_token = self.parser.parse(path) - - def __repr__(self) -> str: - return '%s(path=%r)' % (self.__class__.__name__, self.path) - - def select(self, root: ElementProtocol, **kwargs: Any) -> List[ElementProtocol]: - context = XPathContext(root, **kwargs) - results = self.root_token.get_results(context) - if not isinstance(results, list) or any(not is_etree_element(x) for x in results): - msg = "XPath expressions on lazy resources can select only elements" - raise XMLResourceError(msg) - return results - - def iter_select(self, root: ElementProtocol, **kwargs: Any) -> Iterator[ElementProtocol]: - context = XPathContext(root, **kwargs) - for elem in self.root_token.select_results(context): - if not is_etree_element(elem): - msg = "XPath expressions on lazy resources can select only elements" - raise XMLResourceError(msg) - yield cast(ElementProtocol, elem) - - ### # URL normalization (that fixes many headaches :) class _PurePath(PurePath): @@ -527,13 +479,6 @@ def root(self) -> ElementType: """The XML tree root Element.""" return self._root - @property - def xpath_root(self) -> ElementType: - """The XPath tree root node.""" - if self._xpath_root is None: - self._xpath_root = get_node_tree(self._root) - return self._xpath_root - @property def text(self) -> Optional[str]: """The XML text source, `None` if it's not available.""" @@ -649,12 +594,12 @@ def _lazy_iterparse(self, resource: IO[AnyStr], nsmap: Optional[NsmapType] = Non nsmap_update = False _root = cast(Optional[ElementType], getattr(self, '_root', None)) - try: for event, node in tree_iterator: if event == 'start': if not root_started: self._root = node + self._xpath_root = None root_started = True if nsmap_update and isinstance(nsmap, dict): for prefix, uri in _nsmap: @@ -722,6 +667,7 @@ def _parse(self, resource: IO[AnyStr]) -> None: assert elem is not None self._root = elem + self._xpath_root = None self._nsmap = namespaces def _parse_resource(self, resource: IO[AnyStr], @@ -830,12 +776,13 @@ def parse(self, source: XMLSourceType, lazy: Union[bool, int] = False) -> None: "or a file-like object is required." % type(source) ) + self._xpath_root = None self._text = self._url = None self._lazy = False self._nsmap = {} # TODO for Python 3.8+: need a Protocol for checking this with isinstance() - if hasattr(self._root, 'nsmap'): + if hasattr(self._root, 'xpath'): nsmap = [] lxml_nsmap = None for elem in cast(Any, self._root.iter()): @@ -862,6 +809,45 @@ def parent_map(self) -> Dict[ElementType, Optional[ElementType]]: self._parent_map[self._root] = None return self._parent_map + def _build_node_tree(self, namespaces: Optional[NamespacesType] = None) \ + -> Union[DocumentNode, ElementNode]: + """Build a node tree for non-lazy resources.""" + if hasattr(self._root, 'xpath'): + return build_lxml_node_tree(self._root) + else: + try: + _nsmap = self._nsmap[self._root] + except KeyError: + # A resource based on an ElementTree structure (no namespace maps) + return build_node_tree(self._root, namespaces) + else: + _namespaces = {pfx: uri for pfx, uri in _nsmap} + node_tree = build_node_tree(self._root, _namespaces) + + # Update namespace maps + for node in node_tree.iter_descendants(with_self=False): + if isinstance(node, ElementNode): + elem_nsmap = self._nsmap[node.elem] + if _nsmap is not elem_nsmap: + _nsmap = elem_nsmap + _namespaces = {pfx: uri for pfx, uri in _nsmap} + node.nsmap = _namespaces + + return node_tree + + def get_xpath_node(self, elem: ElementType, *args) -> ElementNode: + if self._lazy: + return LazyElementNode(elem) + + if self._xpath_root is None: + self._xpath_root = self._build_node_tree() + + try: + return self._xpath_root.elements[elem] + except KeyError: + print(self.url) + raise + def get_nsmap(self, elem: ElementType) -> List[Tuple[str, str]]: """ Returns a list of couples with the namespace (nsmap) map of the element. @@ -1160,6 +1146,19 @@ def iter_depth(self, mode: int = 1, nsmap: Optional[NsmapType] = None, if self._source is not resource: resource.close() + def _iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> Iterator[ElementType]: + parser = XPath2Parser(namespaces, strict=False) + token = parser.parse(path) + if self._xpath_root is None: + self._xpath_root = self._build_node_tree(namespaces) + + context = XPathContext(self._xpath_root) + for item in token.select(context): + if not isinstance(item, ElementNode): + msg = "XPath expressions on XML resources can select only elements" + raise XMLResourceError(msg) + yield item.elem + def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None, nsmap: Optional[NsmapType] = None, @@ -1178,7 +1177,6 @@ def iterfind(self, path: str, selector: Any if self._lazy: - selector = LazySelector(path, namespaces) path = path.replace(' ', '').replace('./', '') resource = self.open() level = 0 @@ -1189,6 +1187,8 @@ def iterfind(self, path: str, subtree_level = path.count('/') - 1 else: subtree_level = path.count('/') + 1 + if path.startswith('/'): + path = '.' + path try: for event, node in self._lazy_iterparse(resource, nsmap): @@ -1201,7 +1201,7 @@ def iterfind(self, path: str, if not level: if subtree_level: pass - elif select_all or node in selector.select(self._root): + elif select_all or node in self._root.findall(path, namespaces): yield node elif not subtree_level: continue @@ -1209,10 +1209,11 @@ def iterfind(self, path: str, if ancestors is not None and level < subtree_level: ancestors.pop() continue # pragma: no cover - elif select_all or node in selector.select(self._root): + elif select_all or node in self._root.findall(path, namespaces): yield node del node[:] # delete children, keep attributes, text and tail. + self._xpath_root = None # A rebuild of XPath tree is needed finally: if self._source is not resource: @@ -1220,14 +1221,14 @@ def iterfind(self, path: str, else: if ancestors is None: - selector = iter_select + selector = self._iterfind else: parent_map = self.parent_map ancestors.clear() def selector(*args: Any, **kwargs: Any) -> Iterator[Any]: assert ancestors is not None - for e in iter_select(*args, **kwargs): + for e in self._iterfind(*args, **kwargs): if e is self._root: ancestors.clear() else: @@ -1248,10 +1249,10 @@ def selector(*args: Any, **kwargs: Any) -> Iterator[Any]: yield e if not self._nsmap or nsmap is None: - yield from selector(self._root, path, namespaces, strict=False) + yield from selector(path, namespaces) else: _nsmap = None - for elem in selector(self._root, path, namespaces, strict=False): + for elem in selector(path, namespaces): try: if _nsmap is not self._nsmap[elem]: _nsmap = self._nsmap[elem] diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index afc66c93..02c2efa0 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -9,7 +9,8 @@ # import threading from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, Optional, Union -from elementpath import XPath2Parser, XPathContext, XPathToken, ElementPathError +from elementpath import ElementPathError, XPath2Parser, XPathContext, XPathToken, \ + SchemaElementNode, build_schema_node_tree from ..names import XSD_ASSERT from ..aliases import ElementType, SchemaType, SchemaElementType, NamespacesType @@ -161,3 +162,15 @@ def xpath_proxy(self) -> 'XMLSchemaProxy': schema=cast(XsdSchemaProtocol, self.schema), base_element=cast(XsdElementProtocol, self) ) + + @property + def xpath_node(self) -> SchemaElementNode: + schema_node = self.schema.xpath_node + try: + return cast(SchemaElementNode, schema_node.elements[self]) + except KeyError: + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index d31227ea..def4b19e 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -11,10 +11,13 @@ This module contains classes for XML Schema elements, complex types and model groups. """ import warnings +from copy import copy from decimal import Decimal from types import GeneratorType from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, List, Optional, Tuple, Type, Union -from elementpath import XPath2Parser, ElementPathError, XPathContext, XPathToken + +from elementpath import XPath2Parser, ElementPathError, XPathContext, XPathToken, \ + LazyElementNode, SchemaElementNode, build_schema_node_tree from elementpath.datatypes import AbstractDateTime, Duration, AbstractBinary from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError @@ -32,6 +35,7 @@ from ..converters import XMLSchemaConverter from ..xpath import XsdSchemaProtocol, XsdElementProtocol, XMLSchemaProxy, \ ElementPathMixin, XPathElement +from ..resources import XMLResource from .exceptions import XMLSchemaValidationError, XMLSchemaTypeTableWarning from .helpers import get_xsd_derivation_attribute @@ -388,9 +392,21 @@ def xpath_proxy(self) -> XMLSchemaProxy: base_element=cast(XsdElementProtocol, self) ) + @property + def xpath_node(self) -> SchemaElementNode: + schema_node = self.schema.xpath_node + try: + return cast(SchemaElementNode, schema_node.elements[self]) + except KeyError: + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) + def build(self) -> None: if self._build: - return + return None self._build = True self._parse() @@ -827,6 +843,8 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if content is not None: del content + element_node = None + # Collect field values for identities that refer to this element. for identity, counter in identities.items(): if not counter.enabled or not identity.elements: @@ -843,17 +861,26 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if xsd_type is self.type: xsd_fields = identity.elements[xsd_element] if xsd_fields is None: - xsd_fields = identity.get_fields(xsd_element) + xsd_fields = identity.get_fields(xsd_element.xpath_node) identity.elements[xsd_element] = xsd_fields else: xsd_element = cast(XsdElement, self.copy()) xsd_element.type = xsd_type - xsd_fields = identity.get_fields(xsd_element) + xsd_fields = identity.get_fields(xsd_element.xpath_node) if all(x is None for x in xsd_fields): continue decoders = cast(Tuple[XsdAttribute, ...], xsd_fields) - fields = identity.get_fields(obj, namespaces, decoders=decoders) + + if element_node is None: + try: + resource = cast(XMLResource, kwargs['source']) + except KeyError: + element_node = LazyElementNode(obj, nsmap=copy(namespaces)) + else: + element_node = resource.get_xpath_node(obj, identity.fields) + + fields = identity.get_fields(element_node, namespaces, decoders=decoders) except (XMLSchemaValueError, XMLSchemaTypeError) as err: yield self.validation_error(validation, err, obj, **kwargs) else: diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index b7d2bce7..2f31178e 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Pattern, \ Tuple, Union, Counter from elementpath import XPath2Parser, ElementPathError, XPathToken, XPathContext, \ - translate_pattern, datatypes, get_node_tree + ElementNode, translate_pattern, datatypes from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError from ..names import XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD @@ -206,14 +206,7 @@ def build(self) -> None: self.fields = ref.fields self.ref = ref - xpath_root = self.schema.xpath_node - try: - xpath_item = xpath_root.elements[self.parent] - except KeyError: - context = XPathContext(xpath_root, item=self.parent.xpath_node) # type: ignore - else: - context = XPathContext(xpath_root, item=xpath_item) # type: ignore - + context = XPathContext(self.schema.xpath_node, item=self.parent.xpath_node) self.elements = {} try: for e in self.selector.token.select_results(context): @@ -246,25 +239,36 @@ def build(self) -> None: def built(self) -> bool: return not isinstance(self.elements, tuple) - def get_fields(self, elem: Union[ElementType, 'XsdElement'], + def get_fields(self, element_node: ElementNode, namespaces: Optional[NamespacesType] = None, decoders: Optional[Tuple[XsdAttribute, ...]] = None) -> IdentityCounterType: """ Get fields for a schema or instance context element. - :param elem: an Element or an XsdElement + :param element_node: an Element or an XsdElement :param namespaces: is an optional mapping from namespace prefix to URI. :param decoders: context schema fields decoders. :return: a tuple with field values. An empty field is replaced by `None`. """ fields: List[IdentityFieldItemType] = [] + def append_fields(): + if isinstance(value, list): + fields.append(tuple(value)) + elif isinstance(value, bool): + fields.append((value, bool)) + elif not isinstance(value, float): + fields.append(value) + elif math.isnan(value): + fields.append(('nan', float)) + else: + fields.append((value, float)) + result: Any value: Union[AtomicValueType, None] - root_node = get_node_tree(elem) for k, field in enumerate(self.fields): - context = XPathContext(root_node) + context = XPathContext(element_node) result = field.token.get_results(context) if not result: @@ -274,20 +278,10 @@ def get_fields(self, elem: Union[ElementType, 'XsdElement'], if decoders[k].type.root_type.name == XSD_QNAME: value = get_extended_qname(value, namespaces) - if isinstance(value, list): - fields.append(tuple(value)) - elif isinstance(value, bool): - fields.append((value, bool)) - elif not isinstance(value, float): - fields.append(value) - elif math.isnan(value): - fields.append(('nan', float)) - else: - fields.append((value, float)) - + append_fields() continue - if not isinstance(self, XsdKey) or 'ref' in elem.attrib and \ + if not isinstance(self, XsdKey) or 'ref' in element_node.elem.attrib and \ self.schema.meta_schema is None and self.schema.XSD_VERSION != '1.0': fields.append(None) elif field.target_namespace not in self.maps.namespaces: @@ -311,16 +305,7 @@ def get_fields(self, elem: Union[ElementType, 'XsdElement'], elif isinstance(value, datatypes.QName): value = value.expanded_name - if isinstance(value, list): - fields.append(tuple(value)) - elif isinstance(value, bool): - fields.append((value, bool)) - elif not isinstance(value, float): - fields.append(value) - elif math.isnan(value): - fields.append(('nan', float)) - else: - fields.append((value, float)) + append_fields() else: msg = _("%r field selects multiple values!") raise XMLSchemaValueError(msg % field) diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index 0018c81e..b52bce7c 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -32,7 +32,7 @@ from typing import cast, Callable, ItemsView, List, Optional, Dict, Any, \ Set, Union, Tuple, Type, Iterator, Counter -from elementpath import XPathToken +from elementpath import XPathToken, SchemaElementNode, build_schema_node_tree from ..exceptions import XMLSchemaTypeError, XMLSchemaKeyError, XMLSchemaRuntimeError, \ XMLSchemaValueError, XMLSchemaNamespaceError @@ -572,6 +572,14 @@ def __len__(self) -> int: def xpath_proxy(self) -> XMLSchemaProxy: return XMLSchemaProxy(cast(XsdSchemaProtocol, self)) + @property + def xpath_node(self) -> SchemaElementNode: + if self._xpath_node is None: + self._xpath_node = build_schema_node_tree( + cast(Union[XsdSchemaProtocol], self) + ) + return self._xpath_node + @property def xsd_version(self) -> str: """Compatibility property that returns the class attribute XSD_VERSION.""" diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 8a7c024d..3e558e09 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -13,6 +13,8 @@ from typing import cast, Any, Callable, Dict, Iterable, Iterator, List, Optional, \ Tuple, Union, Counter +from elementpath import SchemaElementNode, build_schema_node_tree + from ..exceptions import XMLSchemaValueError from ..names import XSI_NAMESPACE, XSD_ANY, XSD_ANY_ATTRIBUTE, \ XSD_OPEN_CONTENT, XSD_DEFAULT_OPEN_CONTENT, XSI_TYPE @@ -427,6 +429,18 @@ def xpath_proxy(self) -> XMLSchemaProxy: base_element=cast(XsdElementProtocol, self) ) + @property + def xpath_node(self) -> SchemaElementNode: + schema_node = self.schema.xpath_node + try: + return cast(SchemaElementNode, schema_node.elements[self]) + except KeyError: + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) + def _parse(self) -> None: super(XsdAnyElement, self)._parse() self._parse_particle(self.elem) diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 5e256a9f..70b154ea 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -17,7 +17,7 @@ import re from elementpath import XPath2Parser, XPathSchemaContext, \ - AbstractSchemaProxy, protocols, SchemaNode, build_schema_node_tree + AbstractSchemaProxy, protocols, LazyElementNode, SchemaElementNode from .exceptions import XMLSchemaValueError, XMLSchemaTypeError from .names import XSD_NAMESPACE @@ -131,7 +131,7 @@ class ElementPathMixin(Sequence[E]): attributes: Any = {} namespaces: Any = {} xpath_default_namespace = '' - _xpath_node: Optional[SchemaNode] = None + _xpath_node: Optional[SchemaElementNode] = None @abstractmethod def __iter__(self) -> Iterator[E]: @@ -175,12 +175,9 @@ def xpath_proxy(self) -> XMLSchemaProxy: raise NotImplementedError @property - def xpath_node(self) -> SchemaNode: - if self._xpath_node is None: - self._xpath_node = build_schema_node_tree( - cast(Union[XsdSchemaProtocol], self) - ) - return self._xpath_node + def xpath_node(self) -> SchemaElementNode: + """Returns an XPath node for applying selectors on XSD schema/component.""" + raise NotImplementedError def _get_xpath_namespaces(self, namespaces: Optional[NamespacesType] = None) \ -> Dict[str, str]: @@ -293,6 +290,7 @@ class XPathElement(ElementPathMixin['XPathElement']): """An element node for making XPath operations on schema types.""" name: str parent = None + _xpath_node: Optional[LazyElementNode] def __init__(self, name: str, xsd_type: BaseXsdType) -> None: self.name = name @@ -310,6 +308,12 @@ def xpath_proxy(self) -> XMLSchemaProxy: cast(XsdElementProtocol, self) ) + @property + def xpath_node(self) -> LazyElementNode: + if self._xpath_node is None: + self._xpath_node = LazyElementNode(self) + return self._xpath_node + @property def schema(self) -> SchemaType: return self.type.schema From 8fdc762af068114df85a4f2f971fbc60683fab0c Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 9 Jul 2022 23:19:48 +0200 Subject: [PATCH 09/17] Refactor XMLResource - Optimize iterfind() using only XPath 2.0 selectors (slower but compatible with XPath 2.0 used on XSD schemas) - Fix XPath dynamic context creation for assertions --- xmlschema/resources.py | 154 ++++++++++++++--------------- xmlschema/validators/assertions.py | 12 ++- xmlschema/validators/elements.py | 2 +- xmlschema/validators/schemas.py | 2 +- 4 files changed, 86 insertions(+), 84 deletions(-) diff --git a/xmlschema/resources.py b/xmlschema/resources.py index e2323e35..14a81123 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -20,8 +20,8 @@ from urllib.parse import urlsplit, urlunsplit, unquote, quote_from_bytes from urllib.error import URLError -from elementpath import XPathContext, XPath2Parser, ElementNode, LazyElementNode, \ - DocumentNode, build_lxml_node_tree, build_node_tree +from elementpath import XPathToken, XPathContext, XPath2Parser, ElementNode, \ + LazyElementNode, DocumentNode, build_lxml_node_tree, build_node_tree from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError from .names import XML_NAMESPACE @@ -34,6 +34,8 @@ SECURITY_MODES = frozenset(('all', 'remote', 'local', 'sandbox', 'none')) DRIVE_LETTERS = frozenset(string.ascii_letters) +ResourceNodeType = Union[ElementNode, LazyElementNode, DocumentNode] + ### # URL normalization (that fixes many headaches :) @@ -543,6 +545,23 @@ def _access_control(self, url: str) -> None: if not url.startswith(normalize_url(self._base_url)): raise XMLResourceError("block access to out of sandbox file {}".format(url)) + def _track_nsmap(self, elements, nsmap): + _nsmap = None + for elem in elements: + try: + if _nsmap is not self._nsmap[elem]: + _nsmap = self._nsmap[elem] + if isinstance(nsmap, list): + nsmap.clear() + nsmap.extend(_nsmap) + else: + for prefix, uri in _nsmap: + self._update_nsmap(nsmap, prefix, uri) + except KeyError: + pass + + yield elem + def _update_nsmap(self, nsmap: MutableMapping[str, str], prefix: str, uri: str) -> None: if not prefix: if not uri: @@ -594,12 +613,13 @@ def _lazy_iterparse(self, resource: IO[AnyStr], nsmap: Optional[NsmapType] = Non nsmap_update = False _root = cast(Optional[ElementType], getattr(self, '_root', None)) + _xpath_root = self._xpath_root try: for event, node in tree_iterator: if event == 'start': if not root_started: self._root = node - self._xpath_root = None + self._xpath_root = LazyElementNode(self._root) root_started = True if nsmap_update and isinstance(nsmap, dict): for prefix, uri in _nsmap: @@ -619,6 +639,7 @@ def _lazy_iterparse(self, resource: IO[AnyStr], nsmap: Optional[NsmapType] = Non except Exception as err: if _root is not None: self._root = _root + self._xpath_root = _xpath_root if isinstance(err, PyElementTree.ParseError): raise ElementTree.ParseError(str(err)) from None raise @@ -835,18 +856,24 @@ def _build_node_tree(self, namespaces: Optional[NamespacesType] = None) \ return node_tree - def get_xpath_node(self, elem: ElementType, *args) -> ElementNode: - if self._lazy: - return LazyElementNode(elem) - + @property + def xpath_root(self) -> Union[ElementNode, DocumentNode]: + """The XPath root node.""" if self._xpath_root is None: self._xpath_root = self._build_node_tree() + return self._xpath_root + def get_xpath_node(self, elem: ElementType) -> ElementNode: + """ + Returns an XPath node for the element. If the element does not belong to + the XML tree or if the resource is lazy, it returns a lazy element node. + """ + if self._lazy: + return LazyElementNode(elem) try: - return self._xpath_root.elements[elem] + return self.xpath_root.elements[elem] except KeyError: - print(self.url) - raise + return LazyElementNode(elem) def get_nsmap(self, elem: ElementType) -> List[Tuple[str, str]]: """ @@ -1054,21 +1081,7 @@ def iter(self, tag: Optional[str] = None, nsmap: Optional[NsmapType] = None) \ elif not self._nsmap or nsmap is None: yield from self._root.iter(tag) else: - _nsmap = None - for elem in self._root.iter(tag): - try: - if _nsmap is not self._nsmap[elem]: - _nsmap = self._nsmap[elem] - if isinstance(nsmap, list): - nsmap.clear() - nsmap.extend(_nsmap) - else: - for prefix, uri in _nsmap: - self._update_nsmap(nsmap, prefix, uri) - except KeyError: - pass - - yield elem + yield from self._track_nsmap(self._root.iter(tag), nsmap) def iter_location_hints(self, tag: Optional[str] = None) -> Iterator[Tuple[str, str]]: """ @@ -1146,17 +1159,36 @@ def iter_depth(self, mode: int = 1, nsmap: Optional[NsmapType] = None, if self._source is not resource: resource.close() - def _iterfind(self, path: str, namespaces: Optional[NamespacesType] = None) -> Iterator[ElementType]: - parser = XPath2Parser(namespaces, strict=False) - token = parser.parse(path) - if self._xpath_root is None: - self._xpath_root = self._build_node_tree(namespaces) + @staticmethod + def _select_elements(token: XPathToken, node: ResourceNodeType) -> Iterator[ElementType]: + context = XPathContext(node) + for item in token.select(context): + if not isinstance(item, ElementNode): + msg = "XPath expressions on XML resources can select only elements" + raise XMLResourceError(msg) + yield item.elem - context = XPathContext(self._xpath_root) + def _select_ancestors(self, token: XPathToken, node: ResourceNodeType, + ancestors: List[ElementType]) -> Iterator[ElementType]: + context = XPathContext(node) for item in token.select(context): if not isinstance(item, ElementNode): msg = "XPath expressions on XML resources can select only elements" raise XMLResourceError(msg) + elif item.elem is self._root: + ancestors.clear() + else: + _ancestors = [] + parent = item.parent + while parent is not None: + if parent is not None: + _ancestors.append(parent.elem) + parent = parent.parent + + if _ancestors: + ancestors.clear() + ancestors.extend(reversed(_ancestors)) + yield item.elem def iterfind(self, path: str, @@ -1166,7 +1198,8 @@ def iterfind(self, path: str, """ Apply XPath selection to XML resource that yields full subtrees. - :param path: an XPath expression to select element nodes. + :param path: an XPath 2.0 expression that selects element nodes. \ + Selecting other values or nodes raise an error. :param namespaces: an optional mapping from namespace prefixes to URIs \ used for parsing the XPath expression. :param nsmap: provide a list/dict for tracking the namespaces of yielded \ @@ -1174,6 +1207,8 @@ def iterfind(self, path: str, the tracking is on the whole tree, renaming prefixes in case of conflicts. :param ancestors: provide a list for tracking the ancestors of yielded elements. """ + parser = XPath2Parser(namespaces, strict=False) + token = parser.parse(path) selector: Any if self._lazy: @@ -1187,8 +1222,6 @@ def iterfind(self, path: str, subtree_level = path.count('/') - 1 else: subtree_level = path.count('/') + 1 - if path.startswith('/'): - path = '.' + path try: for event, node in self._lazy_iterparse(resource, nsmap): @@ -1201,7 +1234,8 @@ def iterfind(self, path: str, if not level: if subtree_level: pass - elif select_all or node in self._root.findall(path, namespaces): + elif select_all or \ + node in self._select_elements(token, self._xpath_root): yield node elif not subtree_level: continue @@ -1209,11 +1243,12 @@ def iterfind(self, path: str, if ancestors is not None and level < subtree_level: ancestors.pop() continue # pragma: no cover - elif select_all or node in self._root.findall(path, namespaces): + elif select_all or \ + node in self._select_elements(token, self._xpath_root): yield node del node[:] # delete children, keep attributes, text and tail. - self._xpath_root = None # A rebuild of XPath tree is needed + self._xpath_root = LazyElementNode(self._root) # reset XPath tree finally: if self._source is not resource: @@ -1221,51 +1256,14 @@ def iterfind(self, path: str, else: if ancestors is None: - selector = self._iterfind + selector = self._select_elements(token, self.xpath_root) else: - parent_map = self.parent_map - ancestors.clear() - - def selector(*args: Any, **kwargs: Any) -> Iterator[Any]: - assert ancestors is not None - for e in self._iterfind(*args, **kwargs): - if e is self._root: - ancestors.clear() - else: - _ancestors = [] - parent = e - try: - while True: - parent = parent_map[parent] - if parent is not None: - _ancestors.append(parent) - except KeyError: - pass - - if _ancestors: - ancestors.clear() - ancestors.extend(reversed(_ancestors)) - - yield e + selector = self._select_ancestors(token, self.xpath_root, ancestors) if not self._nsmap or nsmap is None: - yield from selector(path, namespaces) + yield from selector else: - _nsmap = None - for elem in selector(path, namespaces): - try: - if _nsmap is not self._nsmap[elem]: - _nsmap = self._nsmap[elem] - if isinstance(nsmap, list): - nsmap.clear() - nsmap.extend(_nsmap) - else: - for prefix, uri in _nsmap: - self._update_nsmap(nsmap, prefix, uri) - except KeyError: - pass - - yield elem + yield from self._track_nsmap(selector, nsmap) def find(self, path: str, namespaces: Optional[NamespacesType] = None, diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 02c2efa0..0f6fe5cd 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -10,7 +10,7 @@ import threading from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, Optional, Union from elementpath import ElementPathError, XPath2Parser, XPathContext, XPathToken, \ - SchemaElementNode, build_schema_node_tree + LazyElementNode, SchemaElementNode, build_schema_node_tree from ..names import XSD_ASSERT from ..aliases import ElementType, SchemaType, SchemaElementType, NamespacesType @@ -131,11 +131,15 @@ def __call__(self, elem: ElementType, variables = {'value': None if value is None else self.base_type.text_decode(value)} if source is not None: - context = XPathContext(source.root, namespaces=_namespaces, - item=elem, variables=variables) + context = XPathContext( + root=source.xpath_root, + namespaces=_namespaces, + item=source.get_xpath_node(elem), + variables=variables + ) else: # If validated from a component (could not work with rooted XPath expressions) - context = XPathContext(elem, variables=variables) + context = XPathContext(LazyElementNode(elem), variables=variables) try: if not self.token.evaluate(context): diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index def4b19e..0d8bd555 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -878,7 +878,7 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) except KeyError: element_node = LazyElementNode(obj, nsmap=copy(namespaces)) else: - element_node = resource.get_xpath_node(obj, identity.fields) + element_node = resource.get_xpath_node(obj) fields = identity.get_fields(element_node, namespaces, decoders=decoders) except (XMLSchemaValueError, XMLSchemaTypeError) as err: diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index b52bce7c..cfd5be8a 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -1785,7 +1785,7 @@ def iter_errors(self, source: Union[XMLSourceType, XMLResource], if ancestors[k] is not prev_ancestors[k]: break - path_ = '/'.join(e.tag for e in ancestors) + '/ancestor-or-self::node()' + path_ = f"{'/'.join(e.tag for e in ancestors)}/ancestor-or-self::node()" xsd_ancestors = cast(List[XsdElement], schema.findall(path_, namespaces)[1:]) for e in xsd_ancestors[k:]: From 4086528a2231e026af25cdff31fd226e320dc1c2 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Mon, 11 Jul 2022 17:24:50 +0200 Subject: [PATCH 10/17] Include sample cases from issue #311 (fixed in elementpath>=3.0.0) --- .../issues/issue_311/correct_no_list.xml | 15 +++ .../issues/issue_311/incorrect_with_list.xml | 15 +++ .../issue_311/kPartModel_reduit_issue.xsd | 122 ++++++++++++++++++ tests/test_cases/testfiles | 6 +- 4 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 tests/test_cases/issues/issue_311/correct_no_list.xml create mode 100644 tests/test_cases/issues/issue_311/incorrect_with_list.xml create mode 100644 tests/test_cases/issues/issue_311/kPartModel_reduit_issue.xsd diff --git a/tests/test_cases/issues/issue_311/correct_no_list.xml b/tests/test_cases/issues/issue_311/correct_no_list.xml new file mode 100644 index 00000000..1f67cd35 --- /dev/null +++ b/tests/test_cases/issues/issue_311/correct_no_list.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_cases/issues/issue_311/incorrect_with_list.xml b/tests/test_cases/issues/issue_311/incorrect_with_list.xml new file mode 100644 index 00000000..9c75ae53 --- /dev/null +++ b/tests/test_cases/issues/issue_311/incorrect_with_list.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_cases/issues/issue_311/kPartModel_reduit_issue.xsd b/tests/test_cases/issues/issue_311/kPartModel_reduit_issue.xsd new file mode 100644 index 00000000..1d0b406f --- /dev/null +++ b/tests/test_cases/issues/issue_311/kPartModel_reduit_issue.xsd @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_cases/testfiles b/tests/test_cases/testfiles index 6fb64474..be2f88fa 100644 --- a/tests/test_cases/testfiles +++ b/tests/test_cases/testfiles @@ -139,5 +139,7 @@ issues/issue_298/issue_298-1.xml -L 'http://xmlschema.test/ns' issue_298.xsd issues/issue_298/issue_298-2.xml -L 'http://xmlschema.test/ns' issue_298.xsd issues/issue_306/issue_306.xsd issues/issue_306/issue_306-alt.xsd -issues/issue_306/issue_306-valid.xml -issues/issue_306/issue_306-invalid.xml --errors=1 \ No newline at end of file +issues/issue_311/correct_no_list.xml --version=1.1 --validation-only \ + -L 'http://www.ludd21.com/kPartModel' kPartModel_reduit_issue.xsd +issues/issue_311/incorrect_with_list.xml --version=1.1 --validation-only \ + -L 'http://www.ludd21.com/kPartModel' kPartModel_reduit_issue.xsd From 74f200c16d03c3d3da1591e8d6d2aebdc6ce7b63 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 13 Jul 2022 22:22:28 +0200 Subject: [PATCH 11/17] Fix lazy XPath node creation and reset in XMLResource --- xmlschema/resources.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xmlschema/resources.py b/xmlschema/resources.py index 14a81123..51d3c7f9 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -619,7 +619,9 @@ def _lazy_iterparse(self, resource: IO[AnyStr], nsmap: Optional[NsmapType] = Non if event == 'start': if not root_started: self._root = node - self._xpath_root = LazyElementNode(self._root) + self._xpath_root = LazyElementNode( + self._root, nsmap={k: v for k, v in _nsmap} + ) root_started = True if nsmap_update and isinstance(nsmap, dict): for prefix, uri in _nsmap: @@ -865,13 +867,15 @@ def xpath_root(self) -> Union[ElementNode, DocumentNode]: def get_xpath_node(self, elem: ElementType) -> ElementNode: """ - Returns an XPath node for the element. If the element does not belong to - the XML tree or if the resource is lazy, it returns a lazy element node. + Returns an XPath node for the element, fetching it from the XPath root node. + Returns a new lazy element node if the matching element node is not found. """ - if self._lazy: - return LazyElementNode(elem) + xpath_node = self.xpath_root.get_element_node(elem) + if xpath_node is not None: + return xpath_node + try: - return self.xpath_root.elements[elem] + return LazyElementNode(elem, nsmap=dict(self._nsmap[elem])) except KeyError: return LazyElementNode(elem) @@ -1155,6 +1159,10 @@ def iter_depth(self, mode: int = 1, nsmap: Optional[NsmapType] = None, yield node del node[:] # delete children, keep attributes, text and tail. + + # reset the whole XPath tree to let it still usable if other + # children are added to the root by ElementTree.iterparse(). + self._xpath_root.children.clear() finally: if self._source is not resource: resource.close() @@ -1248,7 +1256,7 @@ def iterfind(self, path: str, yield node del node[:] # delete children, keep attributes, text and tail. - self._xpath_root = LazyElementNode(self._root) # reset XPath tree + self._xpath_root.children.clear() # reset XPath tree finally: if self._source is not resource: From 517045a07fe0efe3d315e681805aa1c084d481a9 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 15 Jul 2022 21:24:56 +0200 Subject: [PATCH 12/17] Remove etree.py module, now in elementpath --- setup.py | 2 +- tests/check_etree_import.py | 54 ---- tests/test_all.py | 2 - tests/test_converters.py | 24 +- tests/test_documents.py | 2 +- tests/test_etree.py | 345 ------------------------- tests/test_etree_import.py | 107 -------- tests/test_helpers.py | 182 ++++++++++++- tests/test_resources.py | 12 +- tests/test_w3c_suite.py | 2 +- tests/test_wsdl.py | 4 +- tests/test_xpath.py | 3 +- tests/validation/test_decoding.py | 2 +- tests/validation/test_encoding.py | 5 +- tests/validation/test_validation.py | 2 +- tests/validators/test_complex_types.py | 12 +- tests/validators/test_exceptions.py | 2 +- tests/validators/test_notations.py | 2 +- tests/validators/test_particles.py | 2 +- tests/validators/test_schemas.py | 6 +- tests/validators/test_xsdbase.py | 2 +- tox.ini | 6 - xmlschema/__init__.py | 5 +- xmlschema/aliases.py | 2 +- xmlschema/cli.py | 3 +- xmlschema/converters/__init__.py | 4 +- xmlschema/converters/abdera.py | 3 +- xmlschema/converters/badgerfish.py | 3 +- xmlschema/converters/columnar.py | 3 +- xmlschema/converters/default.py | 30 ++- xmlschema/converters/jsonml.py | 3 +- xmlschema/converters/parker.py | 3 +- xmlschema/converters/unordered.py | 3 +- xmlschema/dataobjects.py | 4 +- xmlschema/documents.py | 3 +- xmlschema/etree.py | 225 ---------------- xmlschema/helpers.py | 2 +- xmlschema/resources.py | 2 +- xmlschema/testing/_builders.py | 7 +- xmlschema/testing/_case_class.py | 8 +- xmlschema/testing/_helpers.py | 7 +- xmlschema/validators/builtins.py | 61 +++-- xmlschema/validators/elements.py | 12 +- xmlschema/validators/exceptions.py | 2 +- xmlschema/validators/facets.py | 5 +- xmlschema/validators/groups.py | 5 +- xmlschema/validators/schemas.py | 20 +- xmlschema/validators/simple_types.py | 4 +- xmlschema/validators/xsdbase.py | 13 +- 49 files changed, 330 insertions(+), 892 deletions(-) delete mode 100755 tests/check_etree_import.py delete mode 100644 tests/test_etree.py delete mode 100644 tests/test_etree_import.py delete mode 100644 xmlschema/etree.py diff --git a/setup.py b/setup.py index d5e6017f..272018b5 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ ] }, python_requires='>=3.7', - install_requires=['elementpath>=3.0.0, <4.0.0'], + install_requires=['elementpath>=2.0.0, <4.0.0'], extras_require={ 'codegen': ['elementpath>=3.0.0, <4.0.0', 'jinja2'], 'dev': ['tox', 'coverage', 'lxml', 'elementpath>=3.0.0, <4.0.0', diff --git a/tests/check_etree_import.py b/tests/check_etree_import.py deleted file mode 100755 index a8dab92d..00000000 --- a/tests/check_etree_import.py +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c), 2016-2020, SISSA (International School for Advanced Studies). -# All rights reserved. -# This file is distributed under the terms of the MIT License. -# See the file 'LICENSE' in the root directory of the present -# distribution, or http://opensource.org/licenses/MIT. -# -# @author Davide Brunato -# -""" -Check ElementTree import with xmlschema. -""" -import argparse -import sys - -parser = argparse.ArgumentParser(add_help=True) -parser.add_argument( - '--before', action="store_true", default=False, - help="Import ElementTree before xmlschema. If not provided the ElementTree library " - "is loaded after xmlschema." -) -args = parser.parse_args() - -if args.before: - print("Importing ElementTree before xmlschema ...") - import xml.etree.ElementTree as ElementTree - import xmlschema.etree -else: - print("Importing ElementTree after xmlschema ...") - import xmlschema.etree - import xml.etree.ElementTree as ElementTree - -# Check if all modules are loaded in the system table -assert 'xml.etree.ElementTree' in sys.modules, "ElementTree not loaded!" -assert 'xmlschema' in sys.modules, 'xmlschema not loaded' -assert 'xmlschema.etree' in sys.modules, 'xmlschema.etree not loaded' -assert '_elementtree' in sys.modules, "cElementTree is not loaded!" - -# Check imported ElementTree -assert ElementTree._Element_Py is not ElementTree.Element, "ElementTree is pure Python!" -assert xmlschema.etree.ElementTree is ElementTree, \ - "xmlschema has a different ElementTree module!" -assert sys.modules['xml.etree'].ElementTree is ElementTree - -# Check ElementTree and pure Python ElementTree imported in xmlschema -PyElementTree = xmlschema.etree.PyElementTree -assert xmlschema.etree.ElementTree.Element is not xmlschema.etree.ElementTree._Element_Py, \ - "xmlschema's ElementTree is pure Python!" -assert PyElementTree.Element is PyElementTree._Element_Py, \ - "PyElementTree is not pure Python!" -assert xmlschema.etree.ElementTree is not PyElementTree, \ - "xmlschema ElementTree is PyElementTree!" - -print("\nTest OK: ElementTree import is working as expected!") diff --git a/tests/test_all.py b/tests/test_all.py index 8483cb02..93e0fd10 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -24,8 +24,6 @@ def load_tests(loader, tests, pattern): tests.addTests(loader.discover(start_dir=tests_dir, pattern=pattern)) return tests - tests.addTests(loader.discover(start_dir=tests_dir, pattern="test_etree.py")) - tests.addTests(loader.discover(start_dir=tests_dir, pattern="test_etree_import.py")) tests.addTests(loader.discover(start_dir=tests_dir, pattern="test_helpers.py")) tests.addTests(loader.discover(start_dir=tests_dir, pattern="test_namespaces.py")) tests.addTests(loader.discover(start_dir=tests_dir, pattern="test_resources.py")) diff --git a/tests/test_converters.py b/tests/test_converters.py index 5a47364e..a7956b67 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -9,8 +9,9 @@ # @author Davide Brunato # import unittest -import xml.etree.ElementTree as ElementTree +from xml.etree.ElementTree import Element, parse as etree_parse from pathlib import Path +from typing import cast, MutableMapping, Optional, Type try: import lxml.etree as lxml_etree @@ -18,7 +19,6 @@ lxml_etree = None from xmlschema import XMLSchema, XMLSchemaValidationError, fetch_namespaces -from xmlschema.etree import etree_element from xmlschema.dataobjects import DataElement from xmlschema.testing import etree_elements_assert_equal @@ -29,12 +29,16 @@ class TestConverters(unittest.TestCase): + col_xsd_filename: str + col_xml_filename: str + col_nsmap: MutableMapping[str, str] + col_lxml_root: Optional['lxml_etree.ElementTree'] @classmethod def setUpClass(cls): cls.col_xsd_filename = cls.casepath('examples/collection/collection.xsd') cls.col_xml_filename = cls.casepath('examples/collection/collection.xml') - cls.col_xml_root = ElementTree.parse(cls.col_xml_filename).getroot() + cls.col_xml_root = etree_parse(cls.col_xml_filename).getroot() cls.col_nsmap = fetch_namespaces(cls.col_xml_filename) cls.col_namespace = cls.col_nsmap['col'] @@ -49,13 +53,15 @@ def casepath(cls, relative_path): def test_element_class_argument(self): converter = XMLSchemaConverter() - self.assertIs(converter.etree_element_class, etree_element) + self.assertIs(converter.etree_element_class, Element) - converter = XMLSchemaConverter(etree_element_class=etree_element) - self.assertIs(converter.etree_element_class, etree_element) + converter = XMLSchemaConverter(etree_element_class=Element) + self.assertIs(converter.etree_element_class, Element) if lxml_etree is not None: - converter = XMLSchemaConverter(etree_element_class=lxml_etree.Element) + converter = XMLSchemaConverter( + etree_element_class=cast(Type[Element], lxml_etree.Element) + ) self.assertIs(converter.etree_element_class, lxml_etree.Element) def test_prefix_arguments(self): @@ -147,10 +153,10 @@ def test_preserve_root__issue_215(self): def test_etree_element_method(self): converter = XMLSchemaConverter() elem = converter.etree_element('A') - self.assertIsNone(etree_elements_assert_equal(elem, etree_element('A'))) + self.assertIsNone(etree_elements_assert_equal(elem, Element('A'))) elem = converter.etree_element('A', attrib={}) - self.assertIsNone(etree_elements_assert_equal(elem, etree_element('A'))) + self.assertIsNone(etree_elements_assert_equal(elem, Element('A'))) def test_columnar_converter(self): col_schema = XMLSchema(self.col_xsd_filename, converter=ColumnarConverter) diff --git a/tests/test_documents.py b/tests/test_documents.py index 14571768..35c28a76 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -16,6 +16,7 @@ import pathlib import tempfile from decimal import Decimal +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -26,7 +27,6 @@ XMLResourceError, XMLSchemaValidationError, XMLSchemaDecodeError, \ to_json, from_json -from xmlschema.etree import ElementTree from xmlschema.names import XSD_NAMESPACE, XSI_NAMESPACE from xmlschema.helpers import is_etree_element, is_etree_document from xmlschema.resources import XMLResource diff --git a/tests/test_etree.py b/tests/test_etree.py deleted file mode 100644 index 924b9b67..00000000 --- a/tests/test_etree.py +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies). -# All rights reserved. -# This file is distributed under the terms of the MIT License. -# See the file 'LICENSE' in the root directory of the present -# distribution, or http://opensource.org/licenses/MIT. -# -# @author Davide Brunato -# -import unittest -import os -import platform - -try: - import lxml.etree as lxml_etree -except ImportError: - lxml_etree = None - -from xmlschema.etree import ElementTree, PyElementTree, ParseError, \ - SafeXMLParser, etree_tostring -from xmlschema.helpers import etree_getpath, etree_iter_location_hints, \ - etree_iterpath, prune_etree -from xmlschema.testing import etree_elements_assert_equal - -TEST_CASES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_cases/') - - -def casepath(relative_path): - return os.path.join(TEST_CASES_DIR, relative_path) - - -class TestElementTree(unittest.TestCase): - - def test_element_string_serialization(self): - self.assertRaises(TypeError, etree_tostring, '') - - elem = ElementTree.Element('element') - self.assertEqual(etree_tostring(elem), '') - self.assertEqual(etree_tostring(elem, xml_declaration=True), '') - - self.assertEqual(etree_tostring(elem, encoding='us-ascii'), b'') - self.assertEqual(etree_tostring(elem, encoding='us-ascii', indent=' '), - b' ') - self.assertEqual(etree_tostring(elem, encoding='us-ascii', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='ascii'), - b"\n") - self.assertEqual(etree_tostring(elem, encoding='ascii', xml_declaration=False), - b'') - self.assertEqual(etree_tostring(elem, encoding='utf-8'), b'') - self.assertEqual(etree_tostring(elem, encoding='utf-8', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1'), - b"\n") - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1', xml_declaration=False), - b"") - - self.assertEqual(etree_tostring(elem, method='html'), '') - self.assertEqual(etree_tostring(elem, method='text'), '') - - root = ElementTree.XML('\n' - ' text1\n' - ' text2\n' - '') - self.assertEqual(etree_tostring(root, method='text'), '\n text1\n text2') - - def test_py_element_string_serialization(self): - elem = PyElementTree.Element('element') - self.assertEqual(etree_tostring(elem), '') - self.assertEqual(etree_tostring(elem, xml_declaration=True), '') - - self.assertEqual(etree_tostring(elem, encoding='us-ascii'), b'') - self.assertEqual(etree_tostring(elem, encoding='us-ascii', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='ascii'), - b"\n") - self.assertEqual(etree_tostring(elem, encoding='ascii', xml_declaration=False), - b'') - self.assertEqual(etree_tostring(elem, encoding='utf-8'), b'') - self.assertEqual(etree_tostring(elem, encoding='utf-8', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1'), - b"\n") - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1', xml_declaration=False), - b"") - - self.assertEqual(etree_tostring(elem, method='html'), '') - self.assertEqual(etree_tostring(elem, method='text'), '') - - root = PyElementTree.XML('\n' - ' text1\n' - ' text2\n' - '') - self.assertEqual(etree_tostring(root, method='text'), '\n text1\n text2') - - @unittest.skipIf(lxml_etree is None, 'lxml is not installed ...') - def test_lxml_element_string_serialization(self): - elem = lxml_etree.Element('element') - self.assertEqual(etree_tostring(elem), '') - self.assertEqual(etree_tostring(elem, xml_declaration=True), '') - - self.assertEqual(etree_tostring(elem, encoding='us-ascii'), b'') - self.assertEqual(etree_tostring(elem, encoding='us-ascii', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='ascii'), b'') - self.assertEqual(etree_tostring(elem, encoding='ascii', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='utf-8'), b'') - self.assertEqual(etree_tostring(elem, encoding='utf-8', xml_declaration=True), - b'\n') - - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1'), - b"\n") - self.assertEqual(etree_tostring(elem, encoding='iso-8859-1', xml_declaration=False), - b"") - - self.assertEqual(etree_tostring(elem, method='html'), '') - self.assertEqual(etree_tostring(elem, method='text'), '') - - root = lxml_etree.XML('\n' - ' text1\n' - ' text2\n' - '') - self.assertEqual(etree_tostring(root, method='text'), '\n text1\n text2') - - def test_defuse_xml_entities(self): - xml_file = casepath('resources/with_entity.xml') - - elem = ElementTree.parse(xml_file).getroot() - self.assertEqual(elem.text, 'abc') - - parser = SafeXMLParser(target=PyElementTree.TreeBuilder()) - with self.assertRaises(PyElementTree.ParseError) as ctx: - ElementTree.parse(xml_file, parser=parser) - self.assertEqual("Entities are forbidden (entity_name='e')", str(ctx.exception)) - - def test_defuse_xml_external_entities(self): - xml_file = casepath('resources/external_entity.xml') - - with self.assertRaises(ParseError) as ctx: - ElementTree.parse(xml_file) - self.assertIn("undefined entity &ee", str(ctx.exception)) - - parser = SafeXMLParser(target=PyElementTree.TreeBuilder()) - with self.assertRaises(PyElementTree.ParseError) as ctx: - ElementTree.parse(xml_file, parser=parser) - self.assertEqual("Entities are forbidden (entity_name='ee')", str(ctx.exception)) - - def test_defuse_xml_unused_external_entities(self): - xml_file = casepath('resources/unused_external_entity.xml') - - elem = ElementTree.parse(xml_file).getroot() - self.assertEqual(elem.text, 'abc') - - parser = SafeXMLParser(target=PyElementTree.TreeBuilder()) - with self.assertRaises(PyElementTree.ParseError) as ctx: - ElementTree.parse(xml_file, parser=parser) - self.assertEqual("Entities are forbidden (entity_name='ee')", str(ctx.exception)) - - def test_defuse_xml_unparsed_entities(self): - xml_file = casepath('resources/unparsed_entity.xml') - - parser = SafeXMLParser(target=PyElementTree.TreeBuilder()) - with self.assertRaises(PyElementTree.ParseError) as ctx: - ElementTree.parse(xml_file, parser=parser) - self.assertEqual("Unparsed entities are forbidden (entity_name='logo_file')", - str(ctx.exception)) - - def test_defuse_xml_unused_unparsed_entities(self): - xml_file = casepath('resources/unused_unparsed_entity.xml') - - elem = ElementTree.parse(xml_file).getroot() - self.assertIsNone(elem.text) - - parser = SafeXMLParser(target=PyElementTree.TreeBuilder()) - with self.assertRaises(PyElementTree.ParseError) as ctx: - ElementTree.parse(xml_file, parser=parser) - self.assertEqual("Unparsed entities are forbidden (entity_name='logo_file')", - str(ctx.exception)) - - def test_etree_iterpath(self): - root = ElementTree.XML('') - - items = list(etree_iterpath(root)) - self.assertListEqual(items, [ - (root, '.'), (root[0], './b1'), (root[0][0], './b1/c1'), - (root[0][1], './b1/c2'), (root[1], './b2'), (root[2], './b3'), - (root[2][0], './b3/c3') - ]) - - self.assertListEqual(items, list(etree_iterpath(root, tag='*'))) - self.assertListEqual(items, list(etree_iterpath(root, path=''))) - self.assertListEqual(items, list(etree_iterpath(root, path=None))) - - self.assertListEqual(list(etree_iterpath(root, path='/')), [ - (root, '/'), (root[0], '/b1'), (root[0][0], '/b1/c1'), - (root[0][1], '/b1/c2'), (root[1], '/b2'), (root[2], '/b3'), - (root[2][0], '/b3/c3') - ]) - - def test_etree_getpath(self): - root = ElementTree.XML('') - - self.assertEqual(etree_getpath(root, root), '.') - self.assertEqual(etree_getpath(root[0], root), './b1') - self.assertEqual(etree_getpath(root[2][0], root), './b3/c3') - self.assertEqual(etree_getpath(root[0], root, parent_path=True), '.') - self.assertEqual(etree_getpath(root[2][0], root, parent_path=True), './b3') - - self.assertIsNone(etree_getpath(root, root[0])) - self.assertIsNone(etree_getpath(root[0], root[1])) - self.assertIsNone(etree_getpath(root, root, parent_path=True)) - - def test_etree_elements_assert_equal(self): - e1 = ElementTree.XML('text\n\n') - e2 = ElementTree.XML('text\n\n') - - self.assertIsNone(etree_elements_assert_equal(e1, e1)) - self.assertIsNone(etree_elements_assert_equal(e1, e2)) - - if lxml_etree is not None: - e2 = lxml_etree.XML('text\n\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2)) - - e2 = ElementTree.XML('text\n\n') - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2) - self.assertIn("has lesser children than text \n\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2) - self.assertIn("texts differ: 'text' != 'text '", str(ctx.exception)) - - e2 = ElementTree.XML('text\ntext\n') - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("texts differ: None != 'text'", str(ctx.exception)) - - e2 = ElementTree.XML('text\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2)) - - e2 = ElementTree.XML('text\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2) - self.assertIn(r"tails differ: '\n' != None", str(ctx.exception)) - - e2 = ElementTree.XML('text\n\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2) - self.assertIn("attributes differ: {'a': '1'} != {'a': '1 '}", str(ctx.exception)) - - e2 = ElementTree.XML('text\n\n') - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("attribute 'a' values differ: '1' != '2'", str(ctx.exception)) - - e2 = ElementTree.XML('text\n\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2)) - self.assertIsNone(etree_elements_assert_equal(e1, e2, skip_comments=False)) - - if lxml_etree is not None: - e2 = lxml_etree.XML('text\n\n') - self.assertIsNone(etree_elements_assert_equal(e1, e2)) - - e1 = ElementTree.XML('+1') - e2 = ElementTree.XML('+ 1 ') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - - e1 = ElementTree.XML('+1') - e2 = ElementTree.XML('+1.1 ') - - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("texts differ: '+1' != '+1.1 '", str(ctx.exception)) - - e1 = ElementTree.XML('1') - e2 = ElementTree.XML('true ') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - self.assertIsNone(etree_elements_assert_equal(e2, e1, strict=False)) - - e2 = ElementTree.XML('false ') - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("texts differ: '1' != 'false '", str(ctx.exception)) - - e1 = ElementTree.XML(' 0') - self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) - self.assertIsNone(etree_elements_assert_equal(e2, e1, strict=False)) - - e2 = ElementTree.XML('true ') - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("texts differ: ' 0' != 'true '", str(ctx.exception)) - - e1 = ElementTree.XML('text\n\n') - e2 = ElementTree.XML('texttail\n\n') - - with self.assertRaises(AssertionError) as ctx: - etree_elements_assert_equal(e1, e2, strict=False) - self.assertIn("tails differ: None != 'tail'", str(ctx.exception)) - - def test_iter_location_hints(self): - elem = ElementTree.XML( - """""" - ) - self.assertListEqual( - list(etree_iter_location_hints(elem)), - [('http://example.com/xmlschema/ns-A', 'import-case4a.xsd')] - ) - elem = ElementTree.XML( - """""" - ) - self.assertListEqual( - list(etree_iter_location_hints(elem)), [('', 'schema.xsd')] - ) - - def test_prune_etree(self): - root = ElementTree.XML('') - prune_etree(root, selector=lambda x: x.tag == 'b1') - self.assertListEqual([e.tag for e in root.iter()], ['a', 'b2', 'b3', 'c3']) - - root = ElementTree.XML('') - prune_etree(root, selector=lambda x: x.tag.startswith('c')) - self.assertListEqual([e.tag for e in root.iter()], ['a', 'b1', 'b2', 'b3']) - - -if __name__ == '__main__': - header_template = "ElementTree tests for xmlschema with Python {} on {}" - header = header_template.format(platform.python_version(), platform.platform()) - print('{0}\n{1}\n{0}'.format("*" * len(header), header)) - - unittest.main() diff --git a/tests/test_etree_import.py b/tests/test_etree_import.py deleted file mode 100644 index 582e6e47..00000000 --- a/tests/test_etree_import.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies). -# All rights reserved. -# This file is distributed under the terms of the MIT License. -# See the file 'LICENSE' in the root directory of the present -# distribution, or http://opensource.org/licenses/MIT. -# -# @author Davide Brunato -# -import unittest -import os -import sys -import importlib -import subprocess -import platform - - -def is_element_tree_imported(): - return '_elementtree' in sys.modules or 'xml.etree.ElementTree' in sys.modules - - -@unittest.skipUnless(platform.python_implementation() == 'CPython', "requires CPython") -class TestElementTreeImport(unittest.TestCase): - """ - Test ElementTree imports using external script or with single-run import tests. - For running a single-run import test use one of these commands: - - python -m unittest tests/test_etree_import.py -k - python tests/test_etree_import.py -k - - The pattern must match only one test method to be effective, because the import - test can be executed once for each run. - - Example: - - python -m unittest tests/test_etree_import.py -k before - - """ - - @unittest.skipUnless(platform.system() == 'Linux', "requires Linux") - def test_element_tree_import_script(self): - test_dir = os.path.dirname(__file__) or '.' - - cmd = [sys.executable, os.path.join(test_dir, 'check_etree_import.py')] - process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - stderr = process.stderr.decode('utf-8') - self.assertTrue("ModuleNotFoundError" not in stderr, - msg="Test script fails because a package is missing:\n\n{}".format(stderr)) - - self.assertIn("\nTest OK:", process.stdout.decode('utf-8'), - msg="Wrong import of ElementTree after xmlschema:\n\n{}".format(stderr)) - - cmd.append('--before') - process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self.assertTrue("\nTest OK:" in process.stdout.decode('utf-8'), - msg="Wrong import of ElementTree before xmlschema:\n\n{}".format(stderr)) - - def test_import_etree_after(self): - if is_element_tree_imported(): - return # skip if ElementTree is already imported - - xmlschema_etree = importlib.import_module('xmlschema.etree') - ElementTree = importlib.import_module('xml.etree.ElementTree') - - self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, - msg="cElementTree not available!") - elem = xmlschema_etree.PyElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) - self.assertIs(importlib.import_module('xml.etree').ElementTree, ElementTree) - self.assertIs(xmlschema_etree.ElementTree, ElementTree) - - def test_import_etree_before(self): - if is_element_tree_imported(): - return # skip if ElementTree is already imported - - ElementTree = importlib.import_module('xml.etree.ElementTree') - xmlschema_etree = importlib.import_module('xmlschema.etree') - - self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, - msg="cElementTree not available!") - elem = xmlschema_etree.PyElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) - self.assertIs(importlib.import_module('xml.etree').ElementTree, ElementTree) - self.assertIs(xmlschema_etree.ElementTree, ElementTree) - - def test_inconsistent_etree(self): - if is_element_tree_imported(): - return # skip if ElementTree is already imported - - importlib.import_module('xml.etree.ElementTree') - sys.modules.pop('xml.etree.ElementTree') - - with self.assertRaises(RuntimeError) as ctx: - importlib.import_module('xmlschema') - self.assertIn('Inconsistent status for ElementTree module', str(ctx.exception)) - - -if __name__ == '__main__': - header_template = "ElementTree import tests for xmlschema with Python {} on {}" - header = header_template.format(platform.python_version(), platform.platform()) - print('{0}\n{1}\n{0}'.format("*" * len(header), header)) - - unittest.main() diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 3c372ba3..88be13a7 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -13,15 +13,21 @@ import sys import decimal from collections import OrderedDict +from xml.etree import ElementTree + +try: + import lxml.etree as lxml_etree +except ImportError: + lxml_etree = None from xmlschema import XMLSchema, XMLSchemaParseError -from xmlschema.etree import ElementTree, etree_element from xmlschema.names import XSD_NAMESPACE, XSI_NAMESPACE, XSD_SCHEMA, \ XSD_ELEMENT, XSD_SIMPLE_TYPE, XSD_ANNOTATION, XSI_TYPE from xmlschema.helpers import prune_etree, get_namespace, get_qname, \ local_name, get_prefixed_qname, get_extended_qname, raw_xml_encode, \ - count_digits, strictly_equal -from xmlschema.testing import iter_nested_items + count_digits, strictly_equal, etree_iterpath, etree_getpath, \ + etree_iter_location_hints +from xmlschema.testing import iter_nested_items, etree_elements_assert_equal from xmlschema.validators.exceptions import XMLSchemaValidationError from xmlschema.validators.helpers import get_xsd_derivation_attribute, \ decimal_validator, qname_validator, \ @@ -42,7 +48,7 @@ def tearDownClass(cls): XMLSchema.meta_schema.clear() def test_get_xsd_derivation_attribute(self): - elem = etree_element(XSD_ELEMENT, attrib={ + elem = ElementTree.Element(XSD_ELEMENT, attrib={ 'a1': 'extension', 'a2': ' restriction', 'a3': '#all', 'a4': 'other', 'a5': 'restriction extension restriction ', 'a6': 'other restriction' }) @@ -66,28 +72,28 @@ def test_get_xsd_derivation_attribute(self): def test_parse_component(self): component = XMLSchema.meta_schema.types['anyType'] - elem = etree_element(XSD_SCHEMA) + elem = ElementTree.Element(XSD_SCHEMA) self.assertIsNone(component._parse_child_component(elem)) - elem.append(etree_element(XSD_ELEMENT)) + elem.append(ElementTree.Element(XSD_ELEMENT)) self.assertEqual(component._parse_child_component(elem), elem[0]) - elem.append(etree_element(XSD_SIMPLE_TYPE)) + elem.append(ElementTree.Element(XSD_SIMPLE_TYPE)) self.assertRaises(XMLSchemaParseError, component._parse_child_component, elem) self.assertEqual(component._parse_child_component(elem, strict=False), elem[0]) elem.clear() - elem.append(etree_element(XSD_ANNOTATION)) + elem.append(ElementTree.Element(XSD_ANNOTATION)) self.assertIsNone(component._parse_child_component(elem)) - elem.append(etree_element(XSD_SIMPLE_TYPE)) + elem.append(ElementTree.Element(XSD_SIMPLE_TYPE)) self.assertEqual(component._parse_child_component(elem), elem[1]) - elem.append(etree_element(XSD_ELEMENT)) + elem.append(ElementTree.Element(XSD_ELEMENT)) self.assertRaises(XMLSchemaParseError, component._parse_child_component, elem) self.assertEqual(component._parse_child_component(elem, strict=False), elem[1]) elem.clear() - elem.append(etree_element(XSD_ANNOTATION)) - elem.append(etree_element(XSD_ANNOTATION)) + elem.append(ElementTree.Element(XSD_ANNOTATION)) + elem.append(ElementTree.Element(XSD_ANNOTATION)) self.assertIsNone(component._parse_child_component(elem, strict=False)) - elem.append(etree_element(XSD_SIMPLE_TYPE)) + elem.append(ElementTree.Element(XSD_SIMPLE_TYPE)) self.assertEqual(component._parse_child_component(elem), elem[2]) def test_raw_xml_encode_function(self): @@ -275,6 +281,148 @@ def test_get_extended_qname(self): namespaces = {'': XSD_NAMESPACE} self.assertEqual(get_extended_qname('element', namespaces), XSD_ELEMENT) + def test_etree_iterpath(self): + root = ElementTree.XML('') + + items = list(etree_iterpath(root)) + self.assertListEqual(items, [ + (root, '.'), (root[0], './b1'), (root[0][0], './b1/c1'), + (root[0][1], './b1/c2'), (root[1], './b2'), (root[2], './b3'), + (root[2][0], './b3/c3') + ]) + + self.assertListEqual(items, list(etree_iterpath(root, tag='*'))) + self.assertListEqual(items, list(etree_iterpath(root, path=''))) + self.assertListEqual(items, list(etree_iterpath(root, path=None))) + + self.assertListEqual(list(etree_iterpath(root, path='/')), [ + (root, '/'), (root[0], '/b1'), (root[0][0], '/b1/c1'), + (root[0][1], '/b1/c2'), (root[1], '/b2'), (root[2], '/b3'), + (root[2][0], '/b3/c3') + ]) + + def test_etree_getpath(self): + root = ElementTree.XML('') + + self.assertEqual(etree_getpath(root, root), '.') + self.assertEqual(etree_getpath(root[0], root), './b1') + self.assertEqual(etree_getpath(root[2][0], root), './b3/c3') + self.assertEqual(etree_getpath(root[0], root, parent_path=True), '.') + self.assertEqual(etree_getpath(root[2][0], root, parent_path=True), './b3') + + self.assertIsNone(etree_getpath(root, root[0])) + self.assertIsNone(etree_getpath(root[0], root[1])) + self.assertIsNone(etree_getpath(root, root, parent_path=True)) + + def test_etree_elements_assert_equal(self): + e1 = ElementTree.XML('text\n\n') + e2 = ElementTree.XML('text\n\n') + + self.assertIsNone(etree_elements_assert_equal(e1, e1)) + self.assertIsNone(etree_elements_assert_equal(e1, e2)) + + if lxml_etree is not None: + e2 = lxml_etree.XML('text\n\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2)) + + e2 = ElementTree.XML('text\n\n') + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2) + self.assertIn("has lesser children than text \n\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2) + self.assertIn("texts differ: 'text' != 'text '", str(ctx.exception)) + + e2 = ElementTree.XML('text\ntext\n') + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("texts differ: None != 'text'", str(ctx.exception)) + + e2 = ElementTree.XML('text\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2)) + + e2 = ElementTree.XML('text\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2) + self.assertIn(r"tails differ: '\n' != None", str(ctx.exception)) + + e2 = ElementTree.XML('text\n\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2) + self.assertIn("attributes differ: {'a': '1'} != {'a': '1 '}", str(ctx.exception)) + + e2 = ElementTree.XML('text\n\n') + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("attribute 'a' values differ: '1' != '2'", str(ctx.exception)) + + e2 = ElementTree.XML('text\n\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2)) + self.assertIsNone(etree_elements_assert_equal(e1, e2, skip_comments=False)) + + if lxml_etree is not None: + e2 = lxml_etree.XML('text\n\n') + self.assertIsNone(etree_elements_assert_equal(e1, e2)) + + e1 = ElementTree.XML('+1') + e2 = ElementTree.XML('+ 1 ') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + + e1 = ElementTree.XML('+1') + e2 = ElementTree.XML('+1.1 ') + + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("texts differ: '+1' != '+1.1 '", str(ctx.exception)) + + e1 = ElementTree.XML('1') + e2 = ElementTree.XML('true ') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + self.assertIsNone(etree_elements_assert_equal(e2, e1, strict=False)) + + e2 = ElementTree.XML('false ') + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("texts differ: '1' != 'false '", str(ctx.exception)) + + e1 = ElementTree.XML(' 0') + self.assertIsNone(etree_elements_assert_equal(e1, e2, strict=False)) + self.assertIsNone(etree_elements_assert_equal(e2, e1, strict=False)) + + e2 = ElementTree.XML('true ') + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("texts differ: ' 0' != 'true '", str(ctx.exception)) + + e1 = ElementTree.XML('text\n\n') + e2 = ElementTree.XML('texttail\n\n') + + with self.assertRaises(AssertionError) as ctx: + etree_elements_assert_equal(e1, e2, strict=False) + self.assertIn("tails differ: None != 'tail'", str(ctx.exception)) + + def test_iter_location_hints(self): + elem = ElementTree.XML( + """""" + ) + self.assertListEqual( + list(etree_iter_location_hints(elem)), + [('http://example.com/xmlschema/ns-A', 'import-case4a.xsd')] + ) + elem = ElementTree.XML( + """""" + ) + self.assertListEqual( + list(etree_iter_location_hints(elem)), [('', 'schema.xsd')] + ) + def test_prune_etree_function(self): root = ElementTree.XML('') self.assertFalse(prune_etree(root, lambda x: x.tag == 'C')) @@ -308,6 +456,14 @@ def method(self, elem): self.assertListEqual([e.tag for e in root.iter()], ['A']) self.assertEqual(root.attrib, {'id': '1'}) + root = ElementTree.XML('') + prune_etree(root, selector=lambda x: x.tag == 'b1') + self.assertListEqual([e.tag for e in root.iter()], ['a', 'b2', 'b3', 'c3']) + + root = ElementTree.XML('') + prune_etree(root, selector=lambda x: x.tag.startswith('c')) + self.assertListEqual([e.tag for e in root.iter()], ['a', 'b1', 'b2', 'b3']) + def test_decimal_validator(self): self.assertIsNone(decimal_validator(10)) self.assertIsNone(decimal_validator(10.1)) diff --git a/tests/test_resources.py b/tests/test_resources.py index eed38cfb..b31cc4d9 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -21,15 +21,17 @@ from urllib.parse import urlsplit, uses_relative from pathlib import Path, PurePath, PureWindowsPath, PurePosixPath from unittest.mock import patch, MagicMock +from xml.etree import ElementTree try: import lxml.etree as lxml_etree except ImportError: lxml_etree = None +from elementpath.etree import PyElementTree, is_etree_element + from xmlschema import fetch_namespaces, fetch_resource, normalize_url, \ fetch_schema, fetch_schema_locations, XMLResource, XMLResourceError, XMLSchema -from xmlschema.etree import ElementTree, etree_element, py_etree_element, is_etree_element from xmlschema.names import XSD_NAMESPACE import xmlschema.resources from xmlschema.resources import is_url, is_local_url, is_remote_url, \ @@ -790,9 +792,9 @@ def test_xml_resource_defuse(self): self.assertEqual(resource.defuse, 'never') self.assertRaises(ValueError, XMLResource, self.vh_xml_file, defuse='all') self.assertRaises(TypeError, XMLResource, self.vh_xml_file, defuse=None) - self.assertIsInstance(resource.root, etree_element) + self.assertIsInstance(resource.root, ElementTree.Element) resource = XMLResource(self.vh_xml_file, defuse='always', lazy=True) - self.assertIsInstance(resource.root, py_etree_element) + self.assertIsInstance(resource.root, PyElementTree.Element) xml_file = casepath('resources/with_entity.xml') self.assertIsInstance(XMLResource(xml_file, lazy=True), XMLResource) @@ -1371,9 +1373,9 @@ def test_remote_resource_loading(self): def test_schema_defuse(self): vh_schema = XMLSchema(self.vh_xsd_file, defuse='always') - self.assertIsInstance(vh_schema.root, etree_element) + self.assertIsInstance(vh_schema.root, ElementTree.Element) for schema in vh_schema.maps.iter_schemas(): - self.assertIsInstance(schema.root, etree_element) + self.assertIsInstance(schema.root, ElementTree.Element) def test_schema_resource_access(self): vh_schema = XMLSchema(self.vh_xsd_file, allow='sandbox') diff --git a/tests/test_w3c_suite.py b/tests/test_w3c_suite.py index aa6b5319..a87a9cb0 100644 --- a/tests/test_w3c_suite.py +++ b/tests/test_w3c_suite.py @@ -15,6 +15,7 @@ import argparse import os.path import warnings +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -22,7 +23,6 @@ lxml_etree = None from xmlschema import validate, XMLSchema10, XMLSchema11, XMLSchemaException -from xmlschema.etree import ElementTree TEST_SUITE_NAMESPACE = "http://www.w3.org/XML/2004/xml-schema-test-suite/" XLINK_NAMESPACE = "http://www.w3.org/1999/xlink" diff --git a/tests/test_wsdl.py b/tests/test_wsdl.py index 96c7b596..0498d45b 100644 --- a/tests/test_wsdl.py +++ b/tests/test_wsdl.py @@ -12,9 +12,9 @@ import unittest import pathlib +from xml.etree import ElementTree from xmlschema import XMLSchemaValidationError, XMLSchema10, XMLSchema11 -from xmlschema.etree import ElementTree, ParseError from xmlschema.extras.wsdl import WsdlParseError, WsdlComponent, WsdlMessage, \ WsdlPortType, WsdlOperation, WsdlBinding, WsdlService, Wsdl11Document, \ WsdlInput, SoapHeader @@ -319,7 +319,7 @@ def test_example4(self): def test_example5(self): original_example5_file = casepath('features/wsdl/wsdl11_example5.wsdl') - with self.assertRaises(ParseError): + with self.assertRaises(ElementTree.ParseError): Wsdl11Document(original_example5_file) example5_file = casepath('features/wsdl/wsdl11_example5_valid.wsdl') diff --git a/tests/test_xpath.py b/tests/test_xpath.py index 20082d53..0be5226a 100644 --- a/tests/test_xpath.py +++ b/tests/test_xpath.py @@ -13,11 +13,12 @@ import unittest import os import pathlib +from xml.etree import ElementTree + from elementpath import XPath1Parser, XPath2Parser, Selector from xmlschema import XMLSchema10, XMLSchema11 from xmlschema.names import XSD_NAMESPACE -from xmlschema.etree import ElementTree from xmlschema.xpath import XMLSchemaProxy, XPathElement from xmlschema.validators import XsdAtomic, XsdAtomicRestriction diff --git a/tests/validation/test_decoding.py b/tests/validation/test_decoding.py index 7432eb8e..9f1f93f5 100644 --- a/tests/validation/test_decoding.py +++ b/tests/validation/test_decoding.py @@ -16,6 +16,7 @@ import math from decimal import Decimal from collections.abc import MutableMapping, MutableSequence, Set +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -28,7 +29,6 @@ AbderaConverter, JsonMLConverter, ColumnarConverter from xmlschema.names import XSD_STRING -from xmlschema.etree import ElementTree from xmlschema.converters import UnorderedConverter from xmlschema.validators import XMLSchema11 from xmlschema.testing import XsdValidatorTestCase, etree_elements_assert_equal diff --git a/tests/validation/test_encoding.py b/tests/validation/test_encoding.py index 536f7ece..5f74d0bc 100644 --- a/tests/validation/test_encoding.py +++ b/tests/validation/test_encoding.py @@ -12,6 +12,7 @@ import os import unittest from textwrap import dedent +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -19,10 +20,10 @@ lxml_etree = None from elementpath import datatypes +from elementpath.etree import etree_tostring from xmlschema import XMLSchemaEncodeError, XMLSchemaValidationError from xmlschema.converters import UnorderedConverter, JsonMLConverter -from xmlschema.etree import etree_element, etree_tostring, ElementTree from xmlschema.helpers import local_name, is_etree_element from xmlschema.resources import XMLResource from xmlschema.validators.exceptions import XMLSchemaChildrenValidationError @@ -226,7 +227,7 @@ def test_union_types(self): self.check_encode(boolean_or_integer_or_string, "Venice ", u'Venice ') def test_simple_elements(self): - elem = etree_element('A') + elem = ElementTree.Element('A') elem.text = '89' self.check_encode(self.get_element('A', type='xs:string'), '89', elem) self.check_encode(self.get_element('A', type='xs:integer'), 89, elem) diff --git a/tests/validation/test_validation.py b/tests/validation/test_validation.py index 92d29444..c23064ee 100644 --- a/tests/validation/test_validation.py +++ b/tests/validation/test_validation.py @@ -13,6 +13,7 @@ import sys import decimal from textwrap import dedent +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -22,7 +23,6 @@ import xmlschema from xmlschema import XMLSchemaValidationError -from xmlschema.etree import ElementTree from xmlschema.validators import XMLSchema11 from xmlschema.testing import XsdValidatorTestCase diff --git a/tests/validators/test_complex_types.py b/tests/validators/test_complex_types.py index d6c149ab..213d55d8 100644 --- a/tests/validators/test_complex_types.py +++ b/tests/validators/test_complex_types.py @@ -10,9 +10,9 @@ # import unittest import warnings +from xml.etree.ElementTree import Element from xmlschema import XMLSchemaParseError, XMLSchemaModelError -from xmlschema.etree import etree_element from xmlschema.validators import XMLSchema11 from xmlschema.testing import XsdValidatorTestCase @@ -474,11 +474,11 @@ def test_complex_type_assertion(self): """) xsd_type = schema.types['intRange'] - xsd_type.decode(etree_element('a', attrib={'min': '10', 'max': '19'})) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '10', 'max': '19'}))) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '19', 'max': '19'}))) - self.assertFalse(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '19'}))) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '100'}))) + xsd_type.decode(Element('a', attrib={'min': '10', 'max': '19'})) + self.assertTrue(xsd_type.is_valid(Element('a', attrib={'min': '10', 'max': '19'}))) + self.assertTrue(xsd_type.is_valid(Element('a', attrib={'min': '19', 'max': '19'}))) + self.assertFalse(xsd_type.is_valid(Element('a', attrib={'min': '25', 'max': '19'}))) + self.assertTrue(xsd_type.is_valid(Element('a', attrib={'min': '25', 'max': '100'}))) def test_sequence_extension(self): schema = self.schema_class(""" diff --git a/tests/validators/test_exceptions.py b/tests/validators/test_exceptions.py index 7e8f07ef..0174481f 100644 --- a/tests/validators/test_exceptions.py +++ b/tests/validators/test_exceptions.py @@ -11,6 +11,7 @@ import unittest import os import io +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -18,7 +19,6 @@ lxml_etree = None from xmlschema import XMLSchema, XMLResource -from xmlschema.etree import ElementTree from xmlschema.validators.exceptions import XMLSchemaValidatorError, \ XMLSchemaNotBuiltError, XMLSchemaModelDepthError, XMLSchemaValidationError, \ XMLSchemaChildrenValidationError diff --git a/tests/validators/test_notations.py b/tests/validators/test_notations.py index f6b3bdb9..0ca698c0 100644 --- a/tests/validators/test_notations.py +++ b/tests/validators/test_notations.py @@ -9,9 +9,9 @@ # @author Davide Brunato # import unittest +from xml.etree import ElementTree from xmlschema import XMLSchemaParseError -from xmlschema.etree import ElementTree from xmlschema.names import XSD_NOTATION from xmlschema.validators import XMLSchema10, XMLSchema11, XsdNotation diff --git a/tests/validators/test_particles.py b/tests/validators/test_particles.py index de738ddb..e945d956 100644 --- a/tests/validators/test_particles.py +++ b/tests/validators/test_particles.py @@ -10,9 +10,9 @@ # import os import unittest +from xml.etree import ElementTree from xmlschema import XMLSchema10, XMLSchemaParseError -from xmlschema.etree import ElementTree from xmlschema.validators.particles import ParticleMixin CASES_DIR = os.path.join(os.path.dirname(__file__), '../test_cases') diff --git a/tests/validators/test_schemas.py b/tests/validators/test_schemas.py index b4118970..0f050609 100644 --- a/tests/validators/test_schemas.py +++ b/tests/validators/test_schemas.py @@ -19,10 +19,10 @@ import os import re from textwrap import dedent +from xml.etree.ElementTree import Element from xmlschema import XMLSchemaParseError, XMLSchemaIncludeWarning, XMLSchemaImportWarning from xmlschema.names import XML_NAMESPACE, LOCATION_HINTS, SCHEMAS_DIR, XSD_ELEMENT, XSI_TYPE -from xmlschema.etree import etree_element from xmlschema.validators import XMLSchemaBase, XMLSchema10, XMLSchema11, \ XsdGlobals, Xsd11Attribute from xmlschema.testing import SKIP_REMOTE_TESTS, XsdValidatorTestCase @@ -316,9 +316,9 @@ def test_remote_schemas_loading(self): def test_schema_defuse(self): vh_schema = self.schema_class(self.vh_xsd_file, defuse='always') - self.assertIsInstance(vh_schema.root, etree_element) + self.assertIsInstance(vh_schema.root, Element) for schema in vh_schema.maps.iter_schemas(): - self.assertIsInstance(schema.root, etree_element) + self.assertIsInstance(schema.root, Element) def test_logging(self): self.schema_class(self.vh_xsd_file, loglevel=logging.ERROR) diff --git a/tests/validators/test_xsdbase.py b/tests/validators/test_xsdbase.py index 6103da28..1c60aac2 100644 --- a/tests/validators/test_xsdbase.py +++ b/tests/validators/test_xsdbase.py @@ -13,6 +13,7 @@ import platform import re from textwrap import dedent +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -22,7 +23,6 @@ from xmlschema.validators import XsdValidator, XsdComponent, XMLSchema10, XMLSchema11, \ XMLSchemaParseError, XMLSchemaValidationError, XsdAnnotation, XsdGroup, XsdSimpleType from xmlschema.names import XSD_NAMESPACE, XSD_ELEMENT, XSD_ANNOTATION, XSD_ANY_TYPE -from xmlschema.etree import ElementTree from xmlschema.dataobjects import DataElement CASES_DIR = os.path.join(os.path.dirname(__file__), '../test_cases') diff --git a/tox.ini b/tox.ini index 29e90694..5f6a32f7 100644 --- a/tox.ini +++ b/tox.ini @@ -15,9 +15,6 @@ deps = flake8: flake8 coverage: coverage commands = - python -m unittest tests/test_etree_import.py -k before - python -m unittest tests/test_etree_import.py -k after - python -m unittest tests/test_etree_import.py -k inconsistent python -m unittest whitelist_externals = make @@ -65,9 +62,6 @@ commands = [testenv:coverage] commands = coverage erase - coverage run -a -m unittest tests/test_etree_import.py -k before - coverage run -a -m unittest tests/test_etree_import.py -k after - coverage run -a -m unittest tests/test_etree_import.py -k inconsistent coverage run -a -m unittest coverage report -m diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 8545f81d..ceee778a 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -7,14 +7,15 @@ # # @author Davide Brunato # +from elementpath.etree import etree_tostring + from . import limits from . import translation from .exceptions import XMLSchemaException, XMLResourceError, XMLSchemaNamespaceError -from .etree import ElementData, etree_tostring from .resources import normalize_url, normalize_locations, fetch_resource, \ fetch_namespaces, fetch_schema_locations, fetch_schema, XMLResource from .xpath import ElementPathMixin -from .converters import XMLSchemaConverter, \ +from .converters import ElementData, XMLSchemaConverter, \ UnorderedConverter, ParkerConverter, BadgerFishConverter, \ AbderaConverter, JsonMLConverter, ColumnarConverter from .dataobjects import DataElement, DataElementConverter, DataBindingConverter diff --git a/xmlschema/aliases.py b/xmlschema/aliases.py index 7087de3e..ab23373d 100644 --- a/xmlschema/aliases.py +++ b/xmlschema/aliases.py @@ -28,12 +28,12 @@ from pathlib import Path from decimal import Decimal from typing import Callable, Dict, List, IO, Iterator, MutableMapping, Tuple, Type, Union + from xml.etree import ElementTree from elementpath.datatypes import NormalizedString, QName, Float10, Integer, \ Time, Base64Binary, HexBinary, AnyURI, Duration from elementpath.datatypes.datetime import OrderedDateTime - from .etree import ElementTree from .resources import XMLResource from .converters import XMLSchemaConverter from .validators import XMLSchemaValidationError, XsdComponent, XMLSchemaBase, \ diff --git a/xmlschema/cli.py b/xmlschema/cli.py index 35fa41ae..85b55053 100644 --- a/xmlschema/cli.py +++ b/xmlschema/cli.py @@ -16,9 +16,8 @@ from urllib.error import URLError import xmlschema -from xmlschema import XMLSchema, XMLSchema11, iter_errors, to_json, from_json +from xmlschema import XMLSchema, XMLSchema11, iter_errors, to_json, from_json, etree_tostring from xmlschema.exceptions import XMLSchemaValueError -from xmlschema.etree import etree_tostring PROGRAM_NAME = os.path.basename(sys.argv[0]) diff --git a/xmlschema/converters/__init__.py b/xmlschema/converters/__init__.py index 5952f623..3dac3ed1 100644 --- a/xmlschema/converters/__init__.py +++ b/xmlschema/converters/__init__.py @@ -7,7 +7,7 @@ # # @author Davide Brunato # -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter from .unordered import UnorderedConverter from .parker import ParkerConverter from .badgerfish import BadgerFishConverter @@ -17,4 +17,4 @@ __all__ = ['XMLSchemaConverter', 'UnorderedConverter', 'ParkerConverter', 'BadgerFishConverter', 'AbderaConverter', 'JsonMLConverter', - 'ColumnarConverter'] + 'ColumnarConverter', 'ElementData'] diff --git a/xmlschema/converters/abdera.py b/xmlschema/converters/abdera.py index ab4e8c00..11f03f6e 100644 --- a/xmlschema/converters/abdera.py +++ b/xmlschema/converters/abdera.py @@ -11,9 +11,8 @@ from typing import TYPE_CHECKING, Any, Optional, List, Dict, Type, Union from ..exceptions import XMLSchemaValueError -from ..etree import ElementData from ..aliases import NamespacesType, BaseXsdType -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/converters/badgerfish.py b/xmlschema/converters/badgerfish.py index 3036c928..42ae2282 100644 --- a/xmlschema/converters/badgerfish.py +++ b/xmlschema/converters/badgerfish.py @@ -10,9 +10,8 @@ from collections.abc import MutableMapping, MutableSequence from typing import TYPE_CHECKING, Any, Optional, List, Dict, Type, Union, Tuple -from ..etree import ElementData from ..aliases import NamespacesType, BaseXsdType -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/converters/columnar.py b/xmlschema/converters/columnar.py index ed8b4268..3482d624 100644 --- a/xmlschema/converters/columnar.py +++ b/xmlschema/converters/columnar.py @@ -11,9 +11,8 @@ from typing import TYPE_CHECKING, Any, Optional, List, Dict, Type, Tuple from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError -from ..etree import ElementData from ..aliases import NamespacesType, BaseXsdType -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/converters/default.py b/xmlschema/converters/default.py index 2ebd8299..e2ea4fe6 100644 --- a/xmlschema/converters/default.py +++ b/xmlschema/converters/default.py @@ -7,20 +7,33 @@ # # @author Davide Brunato # +from collections import namedtuple from collections.abc import MutableMapping, MutableSequence from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, Iterable, \ List, Optional, Type, Tuple, Union +from xml.etree.ElementTree import Element from ..exceptions import XMLSchemaTypeError from ..names import XSI_NAMESPACE -from ..etree import etree_element, ElementData -from ..aliases import NamespacesType, ElementType, BaseXsdType +from ..aliases import NamespacesType, BaseXsdType from ..namespaces import NamespaceMapper if TYPE_CHECKING: from ..validators import XsdElement +ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes']) +""" +Namedtuple for Element data interchange between decoders and converters. +The field *tag* is a string containing the Element's tag, *text* can be `None` +or a string representing the Element's text, *content* can be `None`, a list +containing the Element's children or a dictionary containing element name to +list of element contents for the Element's children (used for unordered input +data), *attributes* can be `None` or a dictionary containing the Element's +attributes. +""" + + class XMLSchemaConverter(NamespaceMapper): """ Generic XML Schema based converter class. A converter is used to compose @@ -71,8 +84,7 @@ class XMLSchemaConverter(NamespaceMapper): dict: Type[Dict[str, Any]] = dict list: Type[List[Any]] = list - etree_element_class: Type[ElementType] - etree_element_class = etree_element + etree_element_class: Type[Element] = Element __slots__ = ('text_key', 'ns_prefix', 'attr_prefix', 'cdata_prefix', 'indent', 'preserve_root', 'force_dict', 'force_list') @@ -80,7 +92,7 @@ class XMLSchemaConverter(NamespaceMapper): def __init__(self, namespaces: Optional[NamespacesType] = None, dict_class: Optional[Type[Dict[str, Any]]] = None, list_class: Optional[Type[List[Any]]] = None, - etree_element_class: Optional[Type[ElementType]] = None, + etree_element_class: Optional[Type[Element]] = None, text_key: Optional[str] = '$', attr_prefix: Optional[str] = '@', cdata_prefix: Optional[str] = None, @@ -213,9 +225,9 @@ def map_content(self, content: Iterable[Tuple[str, Any, Any]]) \ def etree_element(self, tag: str, text: Optional[str] = None, - children: Optional[List[ElementType]] = None, + children: Optional[List[Element]] = None, attrib: Optional[Dict[str, str]] = None, - level: int = 0) -> ElementType: + level: int = 0) -> Element: """ Builds an ElementTree's Element using arguments and the element class and the indent spacing stored in the converter instance. @@ -227,7 +239,7 @@ def etree_element(self, tag: str, :param level: the level related to the encoding process (0 means the root). :return: an instance of the Element class is set for the converter instance. """ - if type(self.etree_element_class) is type(etree_element): + if type(self.etree_element_class) is type(Element): if attrib is None: elem = self.etree_element_class(tag) else: @@ -255,7 +267,7 @@ def element_decode(self, data: ElementData, xsd_element: 'XsdElement', Converts a decoded element data to a data structure. :param data: ElementData instance decoded from an Element node. - :param xsd_element: the `XsdElement` associated to decoded the data. + :param xsd_element: the `XsdElement` associated to decode the data. :param xsd_type: optional XSD type for supporting dynamic type through \ *xsi:type* or xs:alternative. :param level: the level related to the decoding process (0 means the root). diff --git a/xmlschema/converters/jsonml.py b/xmlschema/converters/jsonml.py index c9871de9..7ca2be3f 100644 --- a/xmlschema/converters/jsonml.py +++ b/xmlschema/converters/jsonml.py @@ -11,9 +11,8 @@ from typing import TYPE_CHECKING, Any, Optional, List, Dict, Type from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError -from ..etree import ElementData from ..aliases import NamespacesType, BaseXsdType -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/converters/parker.py b/xmlschema/converters/parker.py index 40916807..e25d68fa 100644 --- a/xmlschema/converters/parker.py +++ b/xmlschema/converters/parker.py @@ -10,9 +10,8 @@ from collections.abc import MutableMapping, MutableSequence from typing import TYPE_CHECKING, Any, Optional, List, Dict, Type -from ..etree import ElementData from ..aliases import NamespacesType, BaseXsdType -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/converters/unordered.py b/xmlschema/converters/unordered.py index 34ff50c4..0223fc0b 100644 --- a/xmlschema/converters/unordered.py +++ b/xmlschema/converters/unordered.py @@ -10,8 +10,7 @@ from collections.abc import MutableMapping, MutableSequence from typing import TYPE_CHECKING, cast, Any, Dict, Union -from ..etree import ElementData -from .default import XMLSchemaConverter +from .default import ElementData, XMLSchemaConverter if TYPE_CHECKING: from ..validators import XsdElement diff --git a/xmlschema/dataobjects.py b/xmlschema/dataobjects.py index ce8ac12b..692e8fa2 100644 --- a/xmlschema/dataobjects.py +++ b/xmlschema/dataobjects.py @@ -12,12 +12,12 @@ from typing import TYPE_CHECKING, cast, overload, Any, Dict, List, Iterator, \ Optional, Union, Tuple, Type, MutableMapping, MutableSequence from elementpath import XPathContext, XPath2Parser, build_node_tree, protocols +from elementpath.etree import etree_tostring from .exceptions import XMLSchemaAttributeError, XMLSchemaTypeError, XMLSchemaValueError -from .etree import ElementData, etree_tostring from .aliases import ElementType, XMLSourceType, NamespacesType, BaseXsdType, DecodeType from .helpers import get_namespace, get_prefixed_qname, local_name, raw_xml_encode -from .converters import XMLSchemaConverter +from .converters import ElementData, XMLSchemaConverter from .resources import XMLResource from . import validators diff --git a/xmlschema/documents.py b/xmlschema/documents.py index 6d9d9f54..3726fdb8 100644 --- a/xmlschema/documents.py +++ b/xmlschema/documents.py @@ -12,9 +12,10 @@ from typing import Any, Dict, List, Optional, Type, Union, Tuple, \ IO, BinaryIO, TextIO, Iterator +from elementpath.etree import ElementTree, etree_tostring + from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError from .names import XSD_NAMESPACE, XSI_TYPE -from .etree import ElementTree, etree_tostring from .aliases import ElementType, XMLSourceType, NamespacesType, LocationsType, \ LazyType, SchemaSourceType, ConverterType, DecodeType, EncodeType, \ JsonDecodeType diff --git a/xmlschema/etree.py b/xmlschema/etree.py deleted file mode 100644 index 19fb9068..00000000 --- a/xmlschema/etree.py +++ /dev/null @@ -1,225 +0,0 @@ -# -# Copyright (c), 2016-2020, SISSA (International School for Advanced Studies). -# All rights reserved. -# This file is distributed under the terms of the MIT License. -# See the file 'LICENSE' in the root directory of the present -# distribution, or http://opensource.org/licenses/MIT. -# -# @author Davide Brunato -# -""" -A unified setup module for ElementTree with a safe parser and helper functions. -""" -import sys -import re -from collections import namedtuple -from typing import Any, MutableMapping, Optional, Union - -from .exceptions import XMLSchemaTypeError - -_REGEX_NS_PREFIX = re.compile(r'ns\d+$') - -### -# Programmatic import of xml.etree.ElementTree -# -# In Python 3 the pure python implementation is overwritten by the C module API, -# so use a programmatic re-import to obtain the pure Python module, necessary for -# defining a safer XMLParser. -# -if '_elementtree' in sys.modules: - if 'xml.etree.ElementTree' not in sys.modules: - raise RuntimeError("Inconsistent status for ElementTree module: module " - "is missing but the C optimized version is imported.") - - import xml.etree.ElementTree as ElementTree - - # Temporary remove the loaded modules - sys.modules.pop('xml.etree.ElementTree') - _cmod = sys.modules.pop('_elementtree') - - # Load the pure Python module - sys.modules['_elementtree'] = None # type: ignore[assignment] - import xml.etree.ElementTree as PyElementTree - import xml.etree - - # Restore original modules - sys.modules['_elementtree'] = _cmod - xml.etree.ElementTree = ElementTree - sys.modules['xml.etree.ElementTree'] = ElementTree - -else: - # Load the pure Python module - sys.modules['_elementtree'] = None # type: ignore[assignment] - import xml.etree.ElementTree as PyElementTree - - # Remove the pure Python module from imported modules - del sys.modules['xml.etree'] - del sys.modules['xml.etree.ElementTree'] - del sys.modules['_elementtree'] - - # Load the C optimized ElementTree module - import xml.etree.ElementTree as ElementTree - - -etree_element = ElementTree.Element -ParseError = ElementTree.ParseError -py_etree_element = PyElementTree.Element - - -class SafeXMLParser(PyElementTree.XMLParser): - """ - An XMLParser that forbids entities processing. Drops the *html* argument - that is deprecated since version 3.4. - - :param target: the target object called by the `feed()` method of the \ - parser, that defaults to `TreeBuilder`. - :param encoding: if provided, its value overrides the encoding specified \ - in the XML file. - """ - def __init__(self, target: Optional[Any] = None, encoding: Optional[str] = None) -> None: - super(SafeXMLParser, self).__init__(target=target, encoding=encoding) - self.parser.EntityDeclHandler = self.entity_declaration - self.parser.UnparsedEntityDeclHandler = self.unparsed_entity_declaration - self.parser.ExternalEntityRefHandler = self.external_entity_reference - - def entity_declaration(self, entity_name, is_parameter_entity, value, base, # type: ignore - system_id, public_id, notation_name): - raise PyElementTree.ParseError( - "Entities are forbidden (entity_name={!r})".format(entity_name) - ) - - def unparsed_entity_declaration(self, entity_name, base, system_id, # type: ignore - public_id, notation_name): - raise PyElementTree.ParseError( - "Unparsed entities are forbidden (entity_name={!r})".format(entity_name) - ) - - def external_entity_reference(self, context, base, system_id, public_id): # type: ignore - raise PyElementTree.ParseError( - "External references are forbidden (system_id={!r}, " - "public_id={!r})".format(system_id, public_id) - ) # pragma: no cover (EntityDeclHandler is called before) - - -ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes']) -""" -Namedtuple for Element data interchange between decoders and converters. -The field *tag* is a string containing the Element's tag, *text* can be `None` -or a string representing the Element's text, *content* can be `None`, a list -containing the Element's children or a dictionary containing element name to -list of element contents for the Element's children (used for unordered input -data), *attributes* can be `None` or a dictionary containing the Element's -attributes. -""" - - -def is_etree_element(obj: Any) -> bool: - """A checker for valid ElementTree elements that excludes XsdElement objects.""" - return hasattr(obj, 'append') and hasattr(obj, 'tag') and hasattr(obj, 'attrib') - - -def etree_tostring(elem: etree_element, - namespaces: Optional[MutableMapping[str, str]] = None, - indent: str = '', - max_lines: Optional[int] = None, - spaces_for_tab: Optional[int] = None, - xml_declaration: Optional[bool] = None, - encoding: str = 'unicode', - method: str = 'xml') -> Union[str, bytes]: - """ - Serialize an Element tree to a string. Tab characters are replaced by whitespaces. - - :param elem: the Element instance. - :param namespaces: is an optional mapping from namespace prefix to URI. \ - Provided namespaces are registered before serialization. - :param indent: the base line indentation. - :param max_lines: if truncate serialization after a number of lines \ - (default: do not truncate). - :param spaces_for_tab: number of spaces for replacing tab characters. \ - For default tabs are replaced with 4 spaces, but only if not empty \ - indentation or a max lines limit are provided. - :param xml_declaration: if set to `True` inserts the XML declaration at the head. - :param encoding: if "unicode" (the default) the output is a string, otherwise it’s binary. - :param method: is either "xml" (the default), "html" or "text". - :return: a Unicode string. - """ - def reindent(line: str) -> str: - if not line: - return line - elif line.startswith(min_indent): - return line[start:] if start >= 0 else indent[start:] + line - else: - return indent + line - - etree_module: Any - if not is_etree_element(elem): - raise XMLSchemaTypeError("{!r} is not an Element".format(elem)) - - elif isinstance(elem, py_etree_element): - etree_module = PyElementTree - elif not hasattr(elem, 'nsmap'): - etree_module = ElementTree - else: - import lxml.etree as etree_module # type: ignore[no-redef] - - if namespaces: - default_namespace = namespaces.get('') - for prefix, uri in namespaces.items(): - if prefix and not _REGEX_NS_PREFIX.match(prefix): - etree_module.register_namespace(prefix, uri) - if uri == default_namespace: - default_namespace = None - - if default_namespace and not hasattr(elem, 'nsmap'): - etree_module.register_namespace('', default_namespace) - - xml_text = etree_module.tostring(elem, encoding=encoding, method=method) - if isinstance(xml_text, bytes): - xml_text = xml_text.decode('utf-8') - - if spaces_for_tab: - xml_text = xml_text.replace('\t', ' ' * spaces_for_tab) - elif method != 'text' and (indent or max_lines): - xml_text = xml_text.replace('\t', ' ' * 4) - - if xml_text.startswith(''.format(encoding)] - lines.extend(xml_text.splitlines()) - else: - lines = xml_text.splitlines() - - # Clear ending empty lines - while lines and not lines[-1].strip(): - lines.pop(-1) - - if not lines or method == 'text' or (not indent and not max_lines): - if encoding == 'unicode': - return '\n'.join(lines) - return '\n'.join(lines).encode(encoding) - - last_indent = ' ' * min(k for k in range(len(lines[-1])) if lines[-1][k] != ' ') - if len(lines) > 2: - child_indent = ' ' * min( - k for line in lines[1:-1] for k in range(len(line)) if line[k] != ' ' - ) - min_indent = min(child_indent, last_indent) - else: - min_indent = child_indent = last_indent - - start = len(min_indent) - len(indent) - - if max_lines is not None and len(lines) > max_lines + 2: - lines = lines[:max_lines] + [child_indent + '...'] * 2 + lines[-1:] - - if encoding == 'unicode': - return '\n'.join(reindent(line) for line in lines) - return '\n'.join(reindent(line) for line in lines).encode(encoding) - - -__all__ = ['ElementTree', 'PyElementTree', 'ParseError', 'SafeXMLParser', 'etree_element', - 'py_etree_element', 'ElementData', 'is_etree_element', 'etree_tostring'] diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index b48f6ff6..f9dd713e 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -249,7 +249,7 @@ def etree_iter_location_hints(elem: ElementType) -> Iterator[Tuple[Any, Any]]: def prune_etree(root: ElementType, selector: Callable[[ElementType], bool]) \ -> Optional[bool]: """ - Removes from an tree structure the elements that verify the selector + Removes from a tree structure the elements that verify the selector function. The checking and eventual removals are performed using a breadth-first visit method. diff --git a/xmlschema/resources.py b/xmlschema/resources.py index 51d3c7f9..c1b19d46 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -22,10 +22,10 @@ from elementpath import XPathToken, XPathContext, XPath2Parser, ElementNode, \ LazyElementNode, DocumentNode, build_lxml_node_tree, build_node_tree +from elementpath.etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError from .names import XML_NAMESPACE -from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring from .aliases import ElementType, ElementTreeType, NamespacesType, XMLSourceType, \ NormalizedLocationsType, LocationsType, NsmapType, ParentMapType from .helpers import get_namespace, is_etree_document, etree_iter_location_hints diff --git a/xmlschema/testing/_builders.py b/xmlschema/testing/_builders.py index 49bd0774..d628c7f2 100644 --- a/xmlschema/testing/_builders.py +++ b/xmlschema/testing/_builders.py @@ -17,6 +17,7 @@ import importlib import tempfile import warnings +from xml.etree import ElementTree try: import lxml.etree as lxml_etree @@ -26,7 +27,7 @@ else: lxml_etree_element = lxml_etree.Element -from elementpath import XPathSchemaContext +from elementpath.etree import PyElementTree, etree_tostring import xmlschema from xmlschema import XMLSchemaBase, XMLSchema11, XMLSchemaValidationError, \ @@ -34,8 +35,6 @@ AbderaConverter, JsonMLConverter, ColumnarConverter from xmlschema.names import XSD_IMPORT from xmlschema.helpers import local_name -from xmlschema.etree import etree_tostring, ElementTree, \ - py_etree_element from xmlschema.resources import fetch_namespaces from xmlschema.validators import XsdType, Xsd11ComplexType from xmlschema.dataobjects import DataElementConverter, DataBindingConverter, DataElement @@ -114,7 +113,7 @@ def check_xsd_file(self): # are built with the SafeXMLParser that uses pure Python elements. for e in schema.maps.iter_components(): elem = getattr(e, 'elem', getattr(e, 'root', None)) - if isinstance(elem, py_etree_element): + if isinstance(elem, PyElementTree.Element): break else: raise diff --git a/xmlschema/testing/_case_class.py b/xmlschema/testing/_case_class.py index 45d4c747..bba40769 100644 --- a/xmlschema/testing/_case_class.py +++ b/xmlschema/testing/_case_class.py @@ -15,11 +15,13 @@ import re import os from textwrap import dedent +from xml.etree.ElementTree import Element, iselement + +from elementpath.etree import is_etree_element from xmlschema.exceptions import XMLSchemaValueError from xmlschema.names import XSD_NAMESPACE, XSI_NAMESPACE, XSD_SCHEMA from xmlschema.helpers import get_namespace -from xmlschema.etree import is_etree_element, etree_element from xmlschema.resources import fetch_namespaces from xmlschema.validators import XMLSchema10 from ._helpers import etree_elements_assert_equal @@ -86,7 +88,7 @@ def get_schema_source(self, source): :param source: A string or an ElementTree's Element. :return: An schema source string, an ElementTree's Element or a full pathname. """ - if is_etree_element(source): + if iselement(source): if source.tag in (XSD_SCHEMA, 'schema'): return source elif get_namespace(source.tag): @@ -95,7 +97,7 @@ def get_schema_source(self, source): 'group', 'attributeGroup', 'notation'}: raise XMLSchemaValueError("% is not an XSD global definition/declaration." % source) - root = etree_element('schema', attrib={ + root = Element('schema', attrib={ 'xmlns:xs': XSD_NAMESPACE, 'xmlns:xsi': XSI_NAMESPACE, 'elementFormDefault': "qualified", diff --git a/xmlschema/testing/_helpers.py b/xmlschema/testing/_helpers.py index 1e4c244b..6d329d32 100644 --- a/xmlschema/testing/_helpers.py +++ b/xmlschema/testing/_helpers.py @@ -9,8 +9,9 @@ # import re from typing import Any, Dict, List, Type, Union, Iterator +from xml.etree.ElementTree import Element + from ..helpers import get_namespace, get_qname -from ..etree import etree_element _REGEX_SPACES = re.compile(r'\s+') @@ -33,7 +34,7 @@ def iter_nested_items(items: Union[Dict[Any, Any], List[Any]], yield items -def etree_elements_assert_equal(elem: etree_element, other: etree_element, +def etree_elements_assert_equal(elem: Element, other: Element, strict: bool = True, skip_comments: bool = True, unordered: bool = False) -> None: """ @@ -46,7 +47,7 @@ def etree_elements_assert_equal(elem: etree_element, other: etree_element, :param unordered: children may have different order. :raise: an AssertionError containing information about first difference encountered. """ - children: Union[etree_element, List[etree_element]] + children: Union[Element, List[Element]] if unordered: children = sorted(elem, key=lambda x: '' if callable(x.tag) else x.tag) diff --git a/xmlschema/validators/builtins.py b/xmlschema/validators/builtins.py index 191173ed..9785b0cc 100644 --- a/xmlschema/validators/builtins.py +++ b/xmlschema/validators/builtins.py @@ -16,6 +16,7 @@ from decimal import Decimal from elementpath import datatypes from typing import cast, Any, Dict, Optional, Type, Tuple, Union +from xml.etree.ElementTree import Element from ..exceptions import XMLSchemaValueError from ..names import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ @@ -31,7 +32,6 @@ XSD_DURATION, XSD_DAY_TIME_DURATION, XSD_YEAR_MONTH_DURATION, XSD_BASE64_BINARY, \ XSD_HEX_BINARY, XSD_NOTATION_TYPE, XSD_ERROR, XSD_ASSERTION, XSD_SIMPLE_TYPE, \ XSD_ANY_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ANY_SIMPLE_TYPE -from ..etree import etree_element from ..aliases import ElementType, SchemaType, BaseXsdType from .helpers import decimal_validator, qname_validator, byte_validator, \ @@ -69,16 +69,15 @@ XSD_MIN_EXCLUSIVE, XSD_ASSERTION, XSD_EXPLICIT_TIMEZONE } - # # Element facets instances for builtin types. -PRESERVE_WHITE_SPACE_ELEMENT = etree_element(XSD_WHITE_SPACE, value='preserve') -COLLAPSE_WHITE_SPACE_ELEMENT = etree_element(XSD_WHITE_SPACE, value='collapse') -REPLACE_WHITE_SPACE_ELEMENT = etree_element(XSD_WHITE_SPACE, value='replace') -XSD10_FLOAT_PATTERN_ELEMENT = etree_element( +PRESERVE_WHITE_SPACE_ELEMENT = Element(XSD_WHITE_SPACE, value='preserve') +COLLAPSE_WHITE_SPACE_ELEMENT = Element(XSD_WHITE_SPACE, value='collapse') +REPLACE_WHITE_SPACE_ELEMENT = Element(XSD_WHITE_SPACE, value='replace') +XSD10_FLOAT_PATTERN_ELEMENT = Element( XSD_PATTERN, value=r"(\+|-)?([0-9]+(\.[0-9]*)?|\.[0-9]+)([Ee](\+|-)?[0-9]+)?|INF|-INF|NaN" ) -XSD11_FLOAT_PATTERN_ELEMENT = etree_element( +XSD11_FLOAT_PATTERN_ELEMENT = Element( XSD_PATTERN, value=r"(\+|-)?([0-9]+(\.[0-9]*)?|\.[0-9]+)([Ee](\+|-)?[0-9]+)?|(\+|-)?INF|NaN" ) @@ -203,19 +202,19 @@ 'name': XSD_LANGUAGE, 'python_type': str, 'base_type': XSD_TOKEN, - 'facets': [etree_element(XSD_PATTERN, value=r"[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*")] + 'facets': [Element(XSD_PATTERN, value=r"[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*")] }, # language codes { 'name': XSD_NAME, 'python_type': str, 'base_type': XSD_TOKEN, - 'facets': [etree_element(XSD_PATTERN, value=r"\i\c*")] + 'facets': [Element(XSD_PATTERN, value=r"\i\c*")] }, # not starting with a digit { 'name': XSD_NCNAME, 'python_type': str, 'base_type': XSD_NAME, - 'facets': [etree_element(XSD_PATTERN, value=r"[\i-[:]][\c-[:]]*")] + 'facets': [Element(XSD_PATTERN, value=r"[\i-[:]][\c-[:]]*")] }, # cannot contain colons { 'name': XSD_ID, @@ -236,7 +235,7 @@ 'name': XSD_NMTOKEN, 'python_type': str, 'base_type': XSD_TOKEN, - 'facets': [etree_element(XSD_PATTERN, value=r"\c+")] + 'facets': [Element(XSD_PATTERN, value=r"\c+")] }, # should not contain whitespace (attribute only) # --- Numerical derived types --- @@ -250,81 +249,81 @@ 'python_type': int, 'base_type': XSD_INTEGER, 'facets': [long_validator, - etree_element(XSD_MIN_INCLUSIVE, value='-9223372036854775808'), - etree_element(XSD_MAX_INCLUSIVE, value='9223372036854775807')] + Element(XSD_MIN_INCLUSIVE, value='-9223372036854775808'), + Element(XSD_MAX_INCLUSIVE, value='9223372036854775807')] }, # signed 128 bit value { 'name': XSD_INT, 'python_type': int, 'base_type': XSD_LONG, 'facets': [int_validator, - etree_element(XSD_MIN_INCLUSIVE, value='-2147483648'), - etree_element(XSD_MAX_INCLUSIVE, value='2147483647')] + Element(XSD_MIN_INCLUSIVE, value='-2147483648'), + Element(XSD_MAX_INCLUSIVE, value='2147483647')] }, # signed 64 bit value { 'name': XSD_SHORT, 'python_type': int, 'base_type': XSD_INT, 'facets': [short_validator, - etree_element(XSD_MIN_INCLUSIVE, value='-32768'), - etree_element(XSD_MAX_INCLUSIVE, value='32767')] + Element(XSD_MIN_INCLUSIVE, value='-32768'), + Element(XSD_MAX_INCLUSIVE, value='32767')] }, # signed 32 bit value { 'name': XSD_BYTE, 'python_type': int, 'base_type': XSD_SHORT, 'facets': [byte_validator, - etree_element(XSD_MIN_INCLUSIVE, value='-128'), - etree_element(XSD_MAX_INCLUSIVE, value='127')] + Element(XSD_MIN_INCLUSIVE, value='-128'), + Element(XSD_MAX_INCLUSIVE, value='127')] }, # signed 8 bit value { 'name': XSD_NON_NEGATIVE_INTEGER, 'python_type': int, 'base_type': XSD_INTEGER, - 'facets': [non_negative_int_validator, etree_element(XSD_MIN_INCLUSIVE, value='0')] + 'facets': [non_negative_int_validator, Element(XSD_MIN_INCLUSIVE, value='0')] }, # only zero and more value allowed [>= 0] { 'name': XSD_POSITIVE_INTEGER, 'python_type': int, 'base_type': XSD_NON_NEGATIVE_INTEGER, - 'facets': [positive_int_validator, etree_element(XSD_MIN_INCLUSIVE, value='1')] + 'facets': [positive_int_validator, Element(XSD_MIN_INCLUSIVE, value='1')] }, # only positive value allowed [> 0] { 'name': XSD_UNSIGNED_LONG, 'python_type': int, 'base_type': XSD_NON_NEGATIVE_INTEGER, 'facets': [unsigned_long_validator, - etree_element(XSD_MAX_INCLUSIVE, value='18446744073709551615')] + Element(XSD_MAX_INCLUSIVE, value='18446744073709551615')] }, # unsigned 128 bit value { 'name': XSD_UNSIGNED_INT, 'python_type': int, 'base_type': XSD_UNSIGNED_LONG, - 'facets': [unsigned_int_validator, etree_element(XSD_MAX_INCLUSIVE, value='4294967295')] + 'facets': [unsigned_int_validator, Element(XSD_MAX_INCLUSIVE, value='4294967295')] }, # unsigned 64 bit value { 'name': XSD_UNSIGNED_SHORT, 'python_type': int, 'base_type': XSD_UNSIGNED_INT, - 'facets': [unsigned_short_validator, etree_element(XSD_MAX_INCLUSIVE, value='65535')] + 'facets': [unsigned_short_validator, Element(XSD_MAX_INCLUSIVE, value='65535')] }, # unsigned 32 bit value { 'name': XSD_UNSIGNED_BYTE, 'python_type': int, 'base_type': XSD_UNSIGNED_SHORT, - 'facets': [unsigned_byte_validator, etree_element(XSD_MAX_INCLUSIVE, value='255')] + 'facets': [unsigned_byte_validator, Element(XSD_MAX_INCLUSIVE, value='255')] }, # unsigned 8 bit value { 'name': XSD_NON_POSITIVE_INTEGER, 'python_type': int, 'base_type': XSD_INTEGER, - 'facets': [non_positive_int_validator, etree_element(XSD_MAX_INCLUSIVE, value='0')] + 'facets': [non_positive_int_validator, Element(XSD_MAX_INCLUSIVE, value='0')] }, # only zero and smaller value allowed [<= 0] { 'name': XSD_NEGATIVE_INTEGER, 'python_type': int, 'base_type': XSD_NON_POSITIVE_INTEGER, - 'facets': [negative_int_validator, etree_element(XSD_MAX_INCLUSIVE, value='-1')] + 'facets': [negative_int_validator, Element(XSD_MAX_INCLUSIVE, value='-1')] }, # only negative value allowed [< 0] ) @@ -426,7 +425,7 @@ 'python_type': (datatypes.DateTimeStamp, str), 'base_type': XSD_DATETIME, 'to_python': datatypes.DateTime.fromstring, - 'facets': [etree_element(XSD_EXPLICIT_TIMEZONE, value='required')], + 'facets': [Element(XSD_EXPLICIT_TIMEZONE, value='required')], }, # [-][Y*]YYYY-MM-DD[Thh:mm:ss] with required timezone { 'name': XSD_DAY_TIME_DURATION, @@ -475,7 +474,7 @@ def xsd_builtin_types_factory( # xs:anySimpleType # Ref: https://www.w3.org/TR/xmlschema11-2/#builtin-stds xsd_any_simple_type = xsd_types[XSD_ANY_SIMPLE_TYPE] = XsdSimpleType( - elem=etree_element(XSD_SIMPLE_TYPE, name=XSD_ANY_SIMPLE_TYPE), + elem=Element(XSD_SIMPLE_TYPE, name=XSD_ANY_SIMPLE_TYPE), schema=meta_schema, parent=None, name=XSD_ANY_SIMPLE_TYPE @@ -484,7 +483,7 @@ def xsd_builtin_types_factory( # xs:anyAtomicType # Ref: https://www.w3.org/TR/xmlschema11-2/#builtin-stds xsd_types[XSD_ANY_ATOMIC_TYPE] = meta_schema.xsd_atomic_restriction_class( - elem=etree_element(XSD_SIMPLE_TYPE, name=XSD_ANY_ATOMIC_TYPE), + elem=Element(XSD_SIMPLE_TYPE, name=XSD_ANY_ATOMIC_TYPE), schema=meta_schema, parent=None, name=XSD_ANY_ATOMIC_TYPE, @@ -499,7 +498,7 @@ def xsd_builtin_types_factory( except KeyError: # If builtin type element is missing create a dummy element. Necessary for the # meta-schema XMLSchema.xsd of XSD 1.1, that not includes builtins declarations. - elem = etree_element(XSD_SIMPLE_TYPE, name=name, id=name) + elem = Element(XSD_SIMPLE_TYPE, name=name, id=name) else: elem, schema = value if schema is not meta_schema: diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 0d8bd555..a17c0735 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -15,6 +15,7 @@ from decimal import Decimal from types import GeneratorType from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, List, Optional, Tuple, Type, Union +from xml.etree.ElementTree import Element from elementpath import XPath2Parser, ElementPathError, XPathContext, XPathToken, \ LazyElementNode, SchemaElementNode, build_schema_node_tree @@ -24,7 +25,6 @@ from ..names import XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, XSD_ALTERNATIVE, \ XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSI_NIL, \ XSI_TYPE, XSD_ERROR, XSD_NOTATION_TYPE -from ..etree import ElementData, etree_element from ..aliases import ElementType, SchemaType, BaseXsdType, SchemaElementType, \ ModelParticleType, ComponentClassType, AtomicValueType, DecodeType, \ IterDecodeType, IterEncodeType @@ -32,7 +32,7 @@ from ..helpers import get_qname, get_namespace, etree_iter_location_hints, \ raw_xml_encode, strictly_equal from .. import dataobjects -from ..converters import XMLSchemaConverter +from ..converters import ElementData, XMLSchemaConverter from ..xpath import XsdSchemaProtocol, XsdElementProtocol, XMLSchemaProxy, \ ElementPathMixin, XPathElement from ..resources import XMLResource @@ -113,7 +113,7 @@ class XsdElement(XsdComponent, ParticleMixin, binding: Optional[DataBindingType] = None - def __init__(self, elem: etree_element, + def __init__(self, elem: Element, schema: SchemaType, parent: Optional[XsdComponent] = None, build: bool = True) -> None: @@ -1332,12 +1332,12 @@ def get_type(self, elem: Union[ElementType, ElementData], if value is not None: attrib[k] = value - elem = etree_element(elem.tag, attrib=attrib) + elem = Element(elem.tag, attrib=attrib) else: - elem = etree_element(elem.tag) + elem = Element(elem.tag) if inherited: - dummy = etree_element('_dummy_element', attrib=inherited) + dummy = Element('_dummy_element', attrib=inherited) dummy.attrib.update(elem.attrib) for alt in self.alternatives: diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index d617bbab..cba53302 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -8,9 +8,9 @@ # @author Davide Brunato # from typing import TYPE_CHECKING, Any, Optional, cast, Iterable, Union, Callable +from elementpath.etree import etree_tostring from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError -from ..etree import etree_tostring from ..aliases import ElementType, NamespacesType, SchemaElementType, ModelParticleType from ..helpers import get_prefixed_qname, etree_getpath, is_etree_element from ..translation import gettext as _ diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index b9bcd043..702ac97b 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -16,6 +16,8 @@ from abc import abstractmethod from typing import TYPE_CHECKING, cast, Any, List, Optional, Pattern, Union, \ MutableSequence, overload, Tuple +from xml.etree.ElementTree import Element + from elementpath import XPath2Parser, XPathContext, ElementPathError, \ translate_pattern, RegexError, ElementNode @@ -24,7 +26,6 @@ XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, \ XSD_ASSERTION, XSD_DECIMAL, XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, XSD_QNAME, \ XSD_ANNOTATION -from ..etree import etree_element from ..aliases import ElementType, SchemaType, AtomicValueType, BaseXsdType from ..translation import gettext as _ from ..helpers import count_digits, local_name @@ -777,7 +778,7 @@ class XsdAssertionFacet(XsdFacet): """ _ADMITTED_TAGS = {XSD_ASSERTION} - _root = ElementNode(elem=etree_element('root')) + _root = ElementNode(elem=Element('root')) def __repr__(self) -> str: return '%s(test=%r)' % (self.__class__.__name__, self.path) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 0493abbc..b2b743fa 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -14,16 +14,17 @@ from collections.abc import MutableMapping from typing import TYPE_CHECKING, overload, Any, Iterable, Iterator, List, \ MutableSequence, Optional, Tuple, Union +from xml.etree import ElementTree from .. import limits from ..exceptions import XMLSchemaValueError from ..names import XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ELEMENT, \ XSD_ANY, XSI_TYPE, XSD_ANY_TYPE, XSD_ANNOTATION -from ..etree import etree_element, ElementData from ..aliases import ElementType, NamespacesType, SchemaType, IterDecodeType, \ IterEncodeType, ModelParticleType, SchemaElementType, ComponentClassType from ..translation import gettext as _ from ..helpers import get_qname, local_name, raw_xml_encode +from ..converters import ElementData from .exceptions import XMLSchemaModelError, XMLSchemaModelDepthError, \ XMLSchemaValidationError, XMLSchemaChildrenValidationError, \ @@ -37,7 +38,7 @@ if TYPE_CHECKING: from .complex_types import XsdComplexType -ANY_ELEMENT = etree_element( +ANY_ELEMENT = ElementTree.Element( XSD_ANY, attrib={ 'namespace': '##any', diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index cfd5be8a..e8ae381e 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -31,6 +31,7 @@ from itertools import chain from typing import cast, Callable, ItemsView, List, Optional, Dict, Any, \ Set, Union, Tuple, Type, Iterator, Counter +from xml.etree.ElementTree import Element, ParseError from elementpath import XPathToken, SchemaElementNode, build_schema_node_tree @@ -44,7 +45,6 @@ VC_NAMESPACE, SCHEMAS_DIR, LOCATION_HINTS, XSD_ANNOTATION, XSD_INCLUDE, \ XSD_IMPORT, XSD_REDEFINE, XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT, \ XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_LIST, XSD_RESTRICTION -from ..etree import etree_element, ParseError from ..aliases import ElementType, XMLSourceType, NamespacesType, LocationsType, \ SchemaType, SchemaSourceType, ConverterType, ComponentClassType, DecodeType, \ EncodeType, BaseXsdType, AtomicValueType, ExtraValidatorType, SchemaGlobalType @@ -81,12 +81,12 @@ DRIVE_PATTERN = re.compile(r'^[a-zA-Z]:$') # Elements for building dummy groups -ATTRIBUTE_GROUP_ELEMENT = etree_element(XSD_ATTRIBUTE_GROUP) -ANY_ATTRIBUTE_ELEMENT = etree_element( +ATTRIBUTE_GROUP_ELEMENT = Element(XSD_ATTRIBUTE_GROUP) +ANY_ATTRIBUTE_ELEMENT = Element( XSD_ANY_ATTRIBUTE, attrib={'namespace': '##any', 'processContents': 'lax'} ) -SEQUENCE_ELEMENT = etree_element(XSD_SEQUENCE) -ANY_ELEMENT = etree_element( +SEQUENCE_ELEMENT = Element(XSD_SEQUENCE) +ANY_ELEMENT = Element( XSD_ANY, attrib={ 'namespace': '##any', @@ -874,11 +874,11 @@ def create_any_content_group(self, parent: Union[XsdComplexType, XsdGroup], def create_empty_content_group(self, parent: Union[XsdComplexType, XsdGroup], model: str = 'sequence', **attrib: Any) -> XsdGroup: if model == 'sequence': - group_elem = etree_element(XSD_SEQUENCE, **attrib) + group_elem = Element(XSD_SEQUENCE, **attrib) elif model == 'choice': - group_elem = etree_element(XSD_CHOICE, **attrib) + group_elem = Element(XSD_CHOICE, **attrib) elif model == 'all': - group_elem = etree_element(XSD_ALL, **attrib) + group_elem = Element(XSD_ALL, **attrib) else: msg = _("'model' argument must be (sequence | choice | all)") raise XMLSchemaValueError(msg) @@ -918,7 +918,7 @@ def create_any_type(self) -> XsdComplexType: """ schema = self.meta_schema or self any_type = self.xsd_complex_type_class( - elem=etree_element(XSD_COMPLEX_TYPE, name=XSD_ANY_TYPE), + elem=Element(XSD_COMPLEX_TYPE, name=XSD_ANY_TYPE), schema=schema, parent=None, mixed=True, block='', final='' ) assert isinstance(any_type.content, XsdGroup) @@ -939,7 +939,7 @@ def create_element(self, name: str, parent: Optional[XsdComponent] = None, Used as dummy element for validation/decoding/encoding operations of wildcards and complex types. """ - elem = etree_element(XSD_ELEMENT, name=name, **attrib) + elem = Element(XSD_ELEMENT, name=name, **attrib) if text is not None: elem.text = text return self.xsd_element_class(elem=elem, schema=self, parent=parent) diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 80769529..dfa25eec 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -13,8 +13,8 @@ from decimal import DecimalException from typing import cast, Any, Callable, Dict, Iterator, List, \ Optional, Set, Union, Tuple, Type +from xml.etree import ElementTree -from ..etree import etree_element from ..aliases import ElementType, AtomicValueType, ComponentClassType, \ IterDecodeType, IterEncodeType, BaseXsdType, SchemaType, DecodedValueType, \ EncodedValueType @@ -789,7 +789,7 @@ class XsdList(XsdSimpleType): """ base_type: XsdSimpleType _ADMITTED_TAGS = {XSD_LIST} - _white_space_elem = etree_element( + _white_space_elem = ElementTree.Element( XSD_WHITE_SPACE, attrib={'value': 'collapse', 'fixed': 'true'} ) diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 826bdc33..34859dcc 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -13,14 +13,15 @@ import re from typing import TYPE_CHECKING, cast, Any, Dict, Generic, List, Iterator, Optional, \ Set, Tuple, TypeVar, Union, MutableMapping +from xml.etree import ElementTree -import elementpath +from elementpath import select +from elementpath.etree import is_etree_element, etree_tostring from ..exceptions import XMLSchemaValueError, XMLSchemaTypeError from ..names import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, \ XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID, \ XSD_QNAME, XSD_OVERRIDE, XSD_NOTATION_TYPE, XSD_DECIMAL -from ..etree import is_etree_element, etree_tostring, etree_element from ..aliases import ElementType, NamespacesType, SchemaType, BaseXsdType, \ ComponentClassType, ExtraValidatorType, DecodeType, IterDecodeType, \ EncodeType, IterEncodeType @@ -67,7 +68,7 @@ class XsdValidator: :ivar errors: XSD validator building errors. :vartype errors: list """ - elem: Optional[etree_element] = None + elem: Optional[ElementTree.Element] = None namespaces: Any = None errors: List[XMLSchemaParseError] @@ -275,7 +276,7 @@ class XsdComponent(XsdValidator): _REGEX_SPACES = re.compile(r'\s+') _ADMITTED_TAGS: Union[Set[str], Tuple[str, ...], Tuple[()]] = () - elem: etree_element + elem: ElementTree.Element parent = None name = None ref: Optional['XsdComponent'] = None @@ -284,7 +285,7 @@ class XsdComponent(XsdValidator): _annotation = None _target_namespace: Optional[str] - def __init__(self, elem: etree_element, + def __init__(self, elem: ElementTree.Element, schema: SchemaType, parent: Optional['XsdComponent'] = None, name: Optional[str] = None) -> None: @@ -672,7 +673,7 @@ def __repr__(self) -> str: return '%s(%r)' % (self.__class__.__name__, str(self)[:40]) def __str__(self) -> str: - return '\n'.join(elementpath.select(self.elem, '*/fn:string()')) + return '\n'.join(select(self.elem, '*/fn:string()')) @property def built(self) -> bool: From 8a9844f2b51a16f836d2d8c2da6a0a913f09ef59 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 16 Jul 2022 09:34:57 +0200 Subject: [PATCH 13/17] Restore correctness of type annotations --- xmlschema/resources.py | 29 +++++++++++++++-------------- xmlschema/testing/_case_class.py | 9 +++++++-- xmlschema/validators/assertions.py | 17 +++++++++-------- xmlschema/validators/elements.py | 26 +++++++++++++++----------- xmlschema/validators/identities.py | 2 +- xmlschema/validators/schemas.py | 1 + xmlschema/validators/wildcards.py | 17 +++++++++-------- xmlschema/xpath.py | 6 +++--- 8 files changed, 60 insertions(+), 47 deletions(-) diff --git a/xmlschema/resources.py b/xmlschema/resources.py index c1b19d46..79a7cf5c 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -23,6 +23,7 @@ from elementpath import XPathToken, XPathContext, XPath2Parser, ElementNode, \ LazyElementNode, DocumentNode, build_lxml_node_tree, build_node_tree from elementpath.etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring +from elementpath.protocols import LxmlElementProtocol from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLResourceError from .names import XML_NAMESPACE @@ -545,7 +546,8 @@ def _access_control(self, url: str) -> None: if not url.startswith(normalize_url(self._base_url)): raise XMLResourceError("block access to out of sandbox file {}".format(url)) - def _track_nsmap(self, elements, nsmap): + def _track_nsmap(self, elements: Iterator[ElementType], + nsmap: NsmapType) -> Iterator[ElementType]: _nsmap = None for elem in elements: try: @@ -836,7 +838,7 @@ def _build_node_tree(self, namespaces: Optional[NamespacesType] = None) \ -> Union[DocumentNode, ElementNode]: """Build a node tree for non-lazy resources.""" if hasattr(self._root, 'xpath'): - return build_lxml_node_tree(self._root) + return build_lxml_node_tree(cast(LxmlElementProtocol, self._root)) else: try: _nsmap = self._nsmap[self._root] @@ -844,13 +846,13 @@ def _build_node_tree(self, namespaces: Optional[NamespacesType] = None) \ # A resource based on an ElementTree structure (no namespace maps) return build_node_tree(self._root, namespaces) else: - _namespaces = {pfx: uri for pfx, uri in _nsmap} + _namespaces: Any = {pfx: uri for pfx, uri in _nsmap} node_tree = build_node_tree(self._root, _namespaces) # Update namespace maps for node in node_tree.iter_descendants(with_self=False): if isinstance(node, ElementNode): - elem_nsmap = self._nsmap[node.elem] + elem_nsmap = self._nsmap[cast(ElementType, node.elem)] if _nsmap is not elem_nsmap: _nsmap = elem_nsmap _namespaces = {pfx: uri for pfx, uri in _nsmap} @@ -1162,7 +1164,7 @@ def iter_depth(self, mode: int = 1, nsmap: Optional[NsmapType] = None, # reset the whole XPath tree to let it still usable if other # children are added to the root by ElementTree.iterparse(). - self._xpath_root.children.clear() + self.xpath_root.children.clear() finally: if self._source is not resource: resource.close() @@ -1174,7 +1176,7 @@ def _select_elements(token: XPathToken, node: ResourceNodeType) -> Iterator[Elem if not isinstance(item, ElementNode): msg = "XPath expressions on XML resources can select only elements" raise XMLResourceError(msg) - yield item.elem + yield cast(ElementType, item.elem) def _select_ancestors(self, token: XPathToken, node: ResourceNodeType, ancestors: List[ElementType]) -> Iterator[ElementType]: @@ -1186,18 +1188,17 @@ def _select_ancestors(self, token: XPathToken, node: ResourceNodeType, elif item.elem is self._root: ancestors.clear() else: - _ancestors = [] + _ancestors: Any = [] parent = item.parent while parent is not None: - if parent is not None: - _ancestors.append(parent.elem) - parent = parent.parent + _ancestors.append(parent.value) + parent = parent.parent if _ancestors: ancestors.clear() ancestors.extend(reversed(_ancestors)) - yield item.elem + yield cast(ElementType, item.elem) def iterfind(self, path: str, namespaces: Optional[NamespacesType] = None, @@ -1243,7 +1244,7 @@ def iterfind(self, path: str, if subtree_level: pass elif select_all or \ - node in self._select_elements(token, self._xpath_root): + node in self._select_elements(token, self.xpath_root): yield node elif not subtree_level: continue @@ -1252,11 +1253,11 @@ def iterfind(self, path: str, ancestors.pop() continue # pragma: no cover elif select_all or \ - node in self._select_elements(token, self._xpath_root): + node in self._select_elements(token, self.xpath_root): yield node del node[:] # delete children, keep attributes, text and tail. - self._xpath_root.children.clear() # reset XPath tree + self.xpath_root.children.clear() # reset XPath tree finally: if self._source is not resource: diff --git a/xmlschema/testing/_case_class.py b/xmlschema/testing/_case_class.py index bba40769..7815f1b1 100644 --- a/xmlschema/testing/_case_class.py +++ b/xmlschema/testing/_case_class.py @@ -17,8 +17,6 @@ from textwrap import dedent from xml.etree.ElementTree import Element, iselement -from elementpath.etree import is_etree_element - from xmlschema.exceptions import XMLSchemaValueError from xmlschema.names import XSD_NAMESPACE, XSI_NAMESPACE, XSD_SCHEMA from xmlschema.helpers import get_namespace @@ -41,6 +39,13 @@ class XsdValidatorTestCase(unittest.TestCase): TEST_CASES_DIR = None schema_class = XMLSchema10 + vh_xsd_file: str + vh_xml_file: str + col_xsd_file: str + col_xml_file: str + st_xsd_file: str + models_xsd_file: str + @classmethod def setUpClass(cls): cls.errors = [] diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 0f6fe5cd..b0e92123 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -170,11 +170,12 @@ def xpath_proxy(self) -> 'XMLSchemaProxy': @property def xpath_node(self) -> SchemaElementNode: schema_node = self.schema.xpath_node - try: - return cast(SchemaElementNode, schema_node.elements[self]) - except KeyError: - return build_schema_node_tree( - root=cast(XsdElementProtocol, self), - elements=schema_node.elements, - global_elements=schema_node.children, - ) + node = schema_node.get_element_node(cast(XsdElementProtocol, self)) + if isinstance(node, SchemaElementNode): + return node + + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index a17c0735..404d90d5 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -18,7 +18,7 @@ from xml.etree.ElementTree import Element from elementpath import XPath2Parser, ElementPathError, XPathContext, XPathToken, \ - LazyElementNode, SchemaElementNode, build_schema_node_tree + ElementNode, LazyElementNode, SchemaElementNode, build_schema_node_tree from elementpath.datatypes import AbstractDateTime, Duration, AbstractBinary from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError @@ -395,14 +395,15 @@ def xpath_proxy(self) -> XMLSchemaProxy: @property def xpath_node(self) -> SchemaElementNode: schema_node = self.schema.xpath_node - try: - return cast(SchemaElementNode, schema_node.elements[self]) - except KeyError: - return build_schema_node_tree( - root=cast(XsdElementProtocol, self), - elements=schema_node.elements, - global_elements=schema_node.children, - ) + node = schema_node.get_element_node(cast(XsdElementProtocol, self)) + if isinstance(node, SchemaElementNode): + return node + + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) def build(self) -> None: if self._build: @@ -675,7 +676,10 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if isinstance(identity.elements, tuple): continue # Skip unbuilt identities - context = XPathContext(self.schema.xpath_node, item=xpath_element) + context = XPathContext( + root=self.schema.xpath_node, + item=cast(XsdElementProtocol, xpath_element) + ) for e in identity.selector.token.select_results(context): if not isinstance(e, XsdElement): @@ -843,7 +847,7 @@ def iter_decode(self, obj: ElementType, validation: str = 'lax', **kwargs: Any) if content is not None: del content - element_node = None + element_node: Union[None, ElementNode, LazyElementNode] = None # Collect field values for identities that refer to this element. for identity, counter in identities.items(): diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 2f31178e..108248a0 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -252,7 +252,7 @@ def get_fields(self, element_node: ElementNode, """ fields: List[IdentityFieldItemType] = [] - def append_fields(): + def append_fields() -> None: if isinstance(value, list): fields.append(tuple(value)) elif isinstance(value, bool): diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index e8ae381e..caf0b312 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -277,6 +277,7 @@ class XMLSchemaBase(XsdValidator, ElementPathMixin[Union[SchemaType, XsdElement] fallback_locations: Dict[str, str] = LOCATION_HINTS.copy() _locations: Tuple[Tuple[str, str], ...] = () _annotations = None + _xpath_node: Optional[SchemaElementNode] # XSD components classes xsd_notation_class = XsdNotation diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 3e558e09..ce989fcb 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -432,14 +432,15 @@ def xpath_proxy(self) -> XMLSchemaProxy: @property def xpath_node(self) -> SchemaElementNode: schema_node = self.schema.xpath_node - try: - return cast(SchemaElementNode, schema_node.elements[self]) - except KeyError: - return build_schema_node_tree( - root=cast(XsdElementProtocol, self), - elements=schema_node.elements, - global_elements=schema_node.children, - ) + node = schema_node.get_element_node(cast(XsdElementProtocol, self)) + if isinstance(node, SchemaElementNode): + return node + + return build_schema_node_tree( + root=cast(XsdElementProtocol, self), + elements=schema_node.elements, + global_elements=schema_node.children, + ) def _parse(self) -> None: super(XsdAnyElement, self)._parse() diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 70b154ea..c0634634 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -131,7 +131,7 @@ class ElementPathMixin(Sequence[E]): attributes: Any = {} namespaces: Any = {} xpath_default_namespace = '' - _xpath_node: Optional[SchemaElementNode] = None + _xpath_node: Optional[Union[SchemaElementNode, LazyElementNode]] = None @abstractmethod def __iter__(self) -> Iterator[E]: @@ -175,7 +175,7 @@ def xpath_proxy(self) -> XMLSchemaProxy: raise NotImplementedError @property - def xpath_node(self) -> SchemaElementNode: + def xpath_node(self) -> Union[SchemaElementNode, LazyElementNode]: """Returns an XPath node for applying selectors on XSD schema/component.""" raise NotImplementedError @@ -311,7 +311,7 @@ def xpath_proxy(self) -> XMLSchemaProxy: @property def xpath_node(self) -> LazyElementNode: if self._xpath_node is None: - self._xpath_node = LazyElementNode(self) + self._xpath_node = LazyElementNode(cast(XsdElementProtocol, self)) return self._xpath_node @property From f9b8167152869ae8548a2d4737ab62cd1d266454 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 16 Jul 2022 12:44:49 +0200 Subject: [PATCH 14/17] Fix linting errors in tests and trailing line in bindings.py.jinja --- tests/check_memory.py | 8 +-- .../examples/collection/collection.py | 1 - tests/test_codegen.py | 18 ++++-- tests/test_converters.py | 7 +-- tests/test_helpers.py | 2 +- tests/test_resources.py | 10 ++-- tests/test_wsdl.py | 8 +-- tests/validation/test_decoding.py | 9 ++- tests/validation/test_encoding.py | 7 ++- tests/validation/test_validation.py | 4 +- tests/validators/test_attributes.py | 6 +- tests/validators/test_complex_types.py | 6 +- tests/validators/test_facets.py | 58 +++++++++++-------- tests/validators/test_identities.py | 2 +- tests/validators/test_models.py | 10 ++-- tests/validators/test_particles.py | 2 +- tests/validators/test_schemas.py | 38 ++++++------ tests/validators/test_simple_types.py | 4 +- tests/validators/test_wildcards.py | 2 +- tests/validators/test_xsdbase.py | 28 ++++----- tox.ini | 4 +- .../extras/templates/python/bindings.py.jinja | 8 +-- 22 files changed, 130 insertions(+), 112 deletions(-) diff --git a/tests/check_memory.py b/tests/check_memory.py index 8d1f60c6..02b115d4 100755 --- a/tests/check_memory.py +++ b/tests/check_memory.py @@ -53,10 +53,10 @@ def test_choice_type(value): def import_package(): # Imports of packages used by xmlschema that # have a significant memory usage impact. - import decimal - from urllib.error import URLError - import lxml.etree - import elementpath + import decimal # noqa + from urllib.error import URLError # noqa + import lxml.etree # noqa + import elementpath # noqa import xmlschema return xmlschema diff --git a/tests/test_cases/examples/collection/collection.py b/tests/test_cases/examples/collection/collection.py index a344a3b0..70382b32 100644 --- a/tests/test_cases/examples/collection/collection.py +++ b/tests/test_cases/examples/collection/collection.py @@ -24,4 +24,3 @@ class CollectionBinding(DataElement, metaclass=DataBindingMeta): class PersonBinding(DataElement, metaclass=DataBindingMeta): xsd_element = schema.elements['person'] - diff --git a/tests/test_codegen.py b/tests/test_codegen.py index 1796f9be..500180b1 100644 --- a/tests/test_codegen.py +++ b/tests/test_codegen.py @@ -84,7 +84,7 @@ def casepath(relative_path): XSD_TEST = """\ @@ -112,7 +112,7 @@ def casepath(relative_path): - + @@ -127,6 +127,13 @@ class TestAbstractGenerator(unittest.TestCase): schema_class = XMLSchema10 generator_class = DemoGenerator + schema: XMLSchema10 + searchpath: Path + col_dir: str + col_xsd_file: str + col_xml_file: str + col_schema: XMLSchema10 + @classmethod def setUpClass(cls): cls.schema = cls.schema_class(XSD_TEST) @@ -232,7 +239,7 @@ def test_list_templates(self): language = self.generator_class.formal_language.lower() templates = set(x.name for x in template_dir.glob('{}/*'.format(language))) - templates.update(x.name for x in template_dir.glob('filters/*'.format(language))) + templates.update(x.name for x in template_dir.glob('filters/*')) self.assertSetEqual(set(self.generator.list_templates()), templates) def test_matching_templates(self): @@ -500,7 +507,7 @@ def test_sort_types_filter(self): - + @@ -572,10 +579,9 @@ def test_language_type_filter(self): def test_list_templates(self): template_dir = Path(__file__).parent.joinpath('templates') - language = self.generator_class.formal_language.lower() templates = {'sample.py.jinja', 'bindings.py.jinja'} - templates.update(x.name for x in template_dir.glob('filters/*'.format(language))) + templates.update(x.name for x in template_dir.glob('filters/*')) self.assertSetEqual(set(self.generator.list_templates()), templates) def test_sample_module(self): diff --git a/tests/test_converters.py b/tests/test_converters.py index a7956b67..254963c6 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -104,7 +104,7 @@ def test_cdata_mapping(self): - + """) self.assertEqual( @@ -117,7 +117,7 @@ def test_cdata_mapping(self): def test_preserve_root__issue_215(self): schema = XMLSchema(""" - @@ -128,8 +128,7 @@ def test_preserve_root__issue_215(self): - - """) + """) xml_data = """""" diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 88be13a7..702d153a 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -416,7 +416,7 @@ def test_iter_location_hints(self): [('http://example.com/xmlschema/ns-A', 'import-case4a.xsd')] ) elem = ElementTree.XML( - """""" ) self.assertListEqual( diff --git a/tests/test_resources.py b/tests/test_resources.py index b31cc4d9..1200d9cd 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -195,8 +195,10 @@ def test_normalize_url_windows(self): def test_normalize_url_unc_paths__issue_246(self): url = PureWindowsPath(r'\\host\share\file.xsd').as_uri() - self.assertNotEqual(normalize_url(r'\\host\share\file.xsd'), url) # file://host/share/file.xsd - self.assertEqual(normalize_url(r'\\host\share\file.xsd'), url.replace('file://', 'file:////')) + self.assertNotEqual(normalize_url(r'\\host\share\file.xsd'), + url) # file://host/share/file.xsd + self.assertEqual(normalize_url(r'\\host\share\file.xsd'), + url.replace('file://', 'file:////')) def test_normalize_url_unc_paths__issue_268(self,): unc_path = r'\\filer01\MY_HOME\dev\XMLSCHEMA\test.xsd' @@ -298,9 +300,9 @@ def test_normalize_url_slashes(self): self.assertEqual(normalize_url('//root/dir1/schema.xsd'), 'file:////root/dir1/schema.xsd') self.assertEqual(normalize_url('dir2/schema.xsd', '//root/dir1/'), - f'file:////root/dir1/dir2/schema.xsd') + 'file:////root/dir1/dir2/schema.xsd') self.assertEqual(normalize_url('dir2/schema.xsd', '//root/dir1'), - f'file:////root/dir1/dir2/schema.xsd') + 'file:////root/dir1/dir2/schema.xsd') def test_normalize_url_hash_character(self): url = normalize_url('issue #000.xml', 'file:///dir1/dir2/') diff --git a/tests/test_wsdl.py b/tests/test_wsdl.py index 0498d45b..a80a49a4 100644 --- a/tests/test_wsdl.py +++ b/tests/test_wsdl.py @@ -95,7 +95,7 @@ def casepath(relative_path): WSDL_DOCUMENT_NO_SOAP = """ @@ -401,7 +401,7 @@ def test_wsdl_document_imports(self): def test_wsdl_document_invalid_imports(self): wsdl_template = """ - """ @@ -425,7 +425,7 @@ def test_wsdl_document_invalid_imports(self): self.assertIn('no element found', str(ctx.exception)) wsdl_template = """ - @@ -437,7 +437,7 @@ def test_wsdl_document_invalid_imports(self): self.assertIn('namespace to import must be different', str(ctx.exception)) wsdl_template = """ - diff --git a/tests/validation/test_decoding.py b/tests/validation/test_decoding.py index 9f1f93f5..33472f57 100644 --- a/tests/validation/test_decoding.py +++ b/tests/validation/test_decoding.py @@ -1,5 +1,4 @@ - -#!/usr/bin/env python +# !/usr/bin/env python # # Copyright (c), 2016-2020, SISSA (International School for Advanced Studies). # All rights reserved. @@ -631,8 +630,8 @@ def ascii_strings(value, xsd_type): def test_non_global_schema_path(self): # Issue #157 xs = self.schema_class(""" - @@ -979,7 +978,7 @@ def test_nillable__issue_076(self): def test_default_namespace__issue_077(self): xs = self.schema_class(""" - """) diff --git a/tests/validation/test_encoding.py b/tests/validation/test_encoding.py index 5f74d0bc..39bd6a66 100644 --- a/tests/validation/test_encoding.py +++ b/tests/validation/test_encoding.py @@ -486,7 +486,6 @@ def test_xsi_type_and_attributes_unmap__issue_214(self): - @@ -500,7 +499,7 @@ def test_xsi_type_and_attributes_unmap__issue_214(self): xml1 = """alpha""" self.assertEqual(schema.decode(xml1), 'alpha') - xml2 = """alpha""" @@ -645,7 +644,9 @@ def test_lxml_encode(self): etree_tostring(elem, namespaces=self.col_namespaces), dedent( """\ - + 2 diff --git a/tests/validation/test_validation.py b/tests/validation/test_validation.py index c23064ee..9680ad14 100644 --- a/tests/validation/test_validation.py +++ b/tests/validation/test_validation.py @@ -235,7 +235,7 @@ def test_issue_183(self): <xs:element name="elem2" type="xs:string"/> <xs:element name="elem3" type="xs:string"/> <xs:element name="elem4" type="xs:string"/> - + <xs:element name="root" type="enumType"/> <xs:simpleType name="enumType"> @@ -244,7 +244,7 @@ def test_issue_183(self): <xs:enumeration value="elem2"/> <xs:enumeration value="tns1:other1"/> <xs:enumeration value="elem3"/> - <xs:enumeration value="tns2:other2"/> + <xs:enumeration value="tns2:other2"/> <xs:enumeration value="elem4"/> </xs:restriction> </xs:simpleType> diff --git a/tests/validators/test_attributes.py b/tests/validators/test_attributes.py index 8edb6d26..c21f95cb 100644 --- a/tests/validators/test_attributes.py +++ b/tests/validators/test_attributes.py @@ -160,7 +160,7 @@ def test_name_attribute(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class("""<xs:schema - xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://www.w3.org/2001/XMLSchema-instance" > <xs:attribute name="phone" type="xs:string"/> </xs:schema>""") @@ -525,7 +525,7 @@ def test_target_namespace(self): xs = self.get_schema(dedent("""\ <xs:attributeGroup name="attrs"> - <xs:attribute name="a" type="xs:string" + <xs:attribute name="a" type="xs:string" targetNamespace="http://xmlschema.test/ns"/> <xs:attribute ref="b"/> </xs:attributeGroup> @@ -542,7 +542,7 @@ def test_prohibited_and_fixed_incompatibility(self): with self.assertRaises(XMLSchemaParseError) as ec: self.get_schema(dedent("""\ <xs:attributeGroup name="attrs"> - <xs:attribute name="a" type="xs:string" + <xs:attribute name="a" type="xs:string" use="prohibited" fixed="foo"/> </xs:attributeGroup>""")) diff --git a/tests/validators/test_complex_types.py b/tests/validators/test_complex_types.py index 213d55d8..8c9b099e 100644 --- a/tests/validators/test_complex_types.py +++ b/tests/validators/test_complex_types.py @@ -430,11 +430,11 @@ def test_content_type_property(self): def test_is_empty(self): schema = self.check_schema(""" <xs:complexType name="emptyType1"/> - + <xs:complexType name="emptyType2"> <xs:sequence/> </xs:complexType> - + <xs:complexType name="emptyType3"> <xs:complexContent> <xs:restriction base="xs:anyType"/> @@ -446,7 +446,7 @@ def test_is_empty(self): <xs:element name="elem1"/> </xs:sequence> </xs:complexType> - + <xs:complexType name="notEmptyType2"> <xs:complexContent> <xs:extension base="xs:anyType"/> diff --git a/tests/validators/test_facets.py b/tests/validators/test_facets.py index 0b4b436e..2b1d0667 100644 --- a/tests/validators/test_facets.py +++ b/tests/validators/test_facets.py @@ -20,11 +20,14 @@ XSD_WHITE_SPACE, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, \ XSD_MAX_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ENUMERATION, \ XSD_PATTERN, XSD_ASSERTION +from xmlschema.validators import XsdEnumerationFacets, XsdPatternFacets, XsdAssertionFacet class TestXsdFacets(unittest.TestCase): schema_class = XMLSchema10 + st_xsd_file: pathlib.Path + st_schema: XMLSchema10 @classmethod def setUpClass(cls): @@ -237,7 +240,7 @@ def test_min_length_facet_restriction(self): <xs:restriction base="string20"> <xs:minLength value="30"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) self.assertEqual(schema.types['string20'].get_facet(XSD_MIN_LENGTH).value, 20) @@ -255,7 +258,7 @@ def test_min_length_facet_restriction(self): <xs:restriction base="string40"> <xs:minLength value="30"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_max_length_facet(self): @@ -298,7 +301,7 @@ def test_max_length_facet_restriction(self): <xs:restriction base="string30"> <xs:maxLength value="20"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) self.assertEqual(schema.types['string30'].get_facet(XSD_MAX_LENGTH).value, 30) @@ -316,7 +319,7 @@ def test_max_length_facet_restriction(self): <xs:restriction base="string30"> <xs:maxLength value="40"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_min_inclusive_facet(self): @@ -363,7 +366,7 @@ def test_min_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) facet = schema.types['type1'].get_facet('{%s}%s' % (XSD_NAMESPACE, base_facet)) @@ -385,7 +388,7 @@ def test_min_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['maxInclusive', 'maxExclusive']: @@ -401,7 +404,7 @@ def test_min_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['minExclusive', 'maxExclusive']: @@ -417,7 +420,7 @@ def test_min_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_min_exclusive_facet(self): @@ -463,7 +466,7 @@ def test_min_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) facet = schema.types['type1'].get_facet('{%s}%s' % (XSD_NAMESPACE, base_facet)) @@ -485,7 +488,7 @@ def test_min_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['minInclusive', 'minExclusive']: @@ -501,7 +504,7 @@ def test_min_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['maxInclusive', 'maxExclusive']: @@ -517,7 +520,7 @@ def test_min_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:minExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_max_inclusive_facet(self): @@ -564,7 +567,7 @@ def test_max_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) facet = schema.types['type1'].get_facet('{%s}%s' % (XSD_NAMESPACE, base_facet)) @@ -586,7 +589,7 @@ def test_max_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['minInclusive', 'minExclusive']: @@ -602,7 +605,7 @@ def test_max_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['minExclusive', 'maxExclusive']: @@ -618,7 +621,7 @@ def test_max_inclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxInclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_max_exclusive_facet(self): @@ -664,7 +667,7 @@ def test_max_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) facet = schema.types['type1'].get_facet('{%s}%s' % (XSD_NAMESPACE, base_facet)) @@ -686,7 +689,7 @@ def test_max_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['maxInclusive', 'maxExclusive']: @@ -702,7 +705,7 @@ def test_max_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) for base_facet in ['minInclusive', 'minExclusive']: @@ -718,7 +721,7 @@ def test_max_exclusive_facet_restriction(self): <xs:restriction base="type1"> <xs:maxExclusive value="0"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) def test_total_digits_facet(self): @@ -902,6 +905,8 @@ def test_enumeration_facet(self): self.assertFalse(schema.types['enum2'].is_valid('four')) facet = schema.types['enum2'].get_facet(XSD_ENUMERATION) + self.assertIsInstance(facet, XsdEnumerationFacets) + elem = ElementTree.Element(XSD_ENUMERATION, value='three') facet.append(elem) self.assertTrue(schema.types['enum2'].is_valid('three')) @@ -1029,6 +1034,7 @@ def test_pattern_facet(self): </xs:schema>""")) facet = schema.types['pattern1'].get_facet(XSD_PATTERN) + self.assertIsInstance(facet, XsdPatternFacets) self.assertIsNone(facet('abc')) self.assertRaises(XMLSchemaValidationError, facet, '') self.assertRaises(XMLSchemaValidationError, facet, 'a;') @@ -1060,7 +1066,7 @@ def test_pattern_facet(self): <xs:restriction base="xs:string"> <xs:pattern value="]"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>"""), validation='lax') self.assertEqual(len(schema.all_errors), 2) @@ -1088,6 +1094,7 @@ def test_get_annotation__issue_255(self): </xs:schema>""")) facet = schema.types['enum1'].get_facet(XSD_ENUMERATION) + self.assertIsInstance(facet, XsdEnumerationFacets) self.assertEqual(facet.annotation.documentation[0].text, '1st facet') self.assertEqual(facet.get_annotation(0).documentation[0].text, '1st facet') self.assertIsNone(facet.get_annotation(1)) @@ -1111,6 +1118,7 @@ def test_get_annotation__issue_255(self): </xs:schema>""")) facet = schema.types['pattern1'].get_facet(XSD_PATTERN) + self.assertIsInstance(facet, XsdPatternFacets) self.assertIsNone(facet.get_annotation(0)) self.assertEqual(facet.get_annotation(1).documentation[0].text, '2nd facet') @@ -1130,7 +1138,7 @@ def test_fixed_value(self): <xs:restriction base="string30"> <xs:maxLength value="20"/> </xs:restriction> - </xs:simpleType> + </xs:simpleType> </xs:schema>""")) self.assertIn("'maxLength' facet value is fixed to 30", str(ec.exception)) @@ -1242,7 +1250,7 @@ def test_assertion_facet(self): </xs:simpleType> <xs:simpleType name="string2"> <xs:restriction base="xs:string"> - <xs:assertion test="last()" + <xs:assertion test="last()" xpathDefaultNamespace="http://xpath.test/ns"/> </xs:restriction> </xs:simpleType> @@ -1251,7 +1259,6 @@ def test_assertion_facet(self): <xs:assertion test="position()"/> </xs:restriction> </xs:simpleType> - <xs:simpleType name="integer_list"> <xs:list itemType="xs:integer"/> </xs:simpleType> @@ -1263,10 +1270,12 @@ def test_assertion_facet(self): </xs:schema>""")) facet = schema.types['string1'].get_facet(XSD_ASSERTION) + self.assertIsInstance(facet, XsdAssertionFacet) self.assertIsNone(facet('')) self.assertEqual(facet.xpath_default_namespace, '') facet = schema.types['string2'].get_facet(XSD_ASSERTION) + self.assertIsInstance(facet, XsdAssertionFacet) self.assertEqual(facet.xpath_default_namespace, 'http://xpath.test/ns') with self.assertRaises(XMLSchemaValidationError) as ec: facet('') @@ -1278,6 +1287,7 @@ def test_assertion_facet(self): facet = schema.types['integer_vector'].get_facet(XSD_ASSERTION) self.assertIsNone(facet([1, 2, 3])) + self.assertIsInstance(facet, XsdAssertionFacet) self.assertEqual(facet.parser.variable_types, {'value': 'xs:anySimpleType'}) schema = self.schema_class(dedent("""\ diff --git a/tests/validators/test_identities.py b/tests/validators/test_identities.py index 1b559dac..0d034015 100644 --- a/tests/validators/test_identities.py +++ b/tests/validators/test_identities.py @@ -445,7 +445,7 @@ def test_keyref_reference_definition(self): <xs:keyref name="keyref1" refer="key1"> <xs:selector xpath="."/> <xs:field xpath="."/> - </xs:keyref> + </xs:keyref> </xs:element> <xs:element name="secondary_key" type="xs:string"> <xs:keyref ref="keyref1"/> diff --git a/tests/validators/test_models.py b/tests/validators/test_models.py index 3193ddbe..1b1237b1 100644 --- a/tests/validators/test_models.py +++ b/tests/validators/test_models.py @@ -620,35 +620,35 @@ def test_single_item_groups(self): <xs:choice> <xs:any maxOccurs="2" processContents="lax"/> </xs:choice> - </xs:complexType> + </xs:complexType> </xs:element> <xs:element name="a2"> <xs:complexType> <xs:choice> <xs:any maxOccurs="2" processContents="strict"/> </xs:choice> - </xs:complexType> + </xs:complexType> </xs:element> <xs:element name="a3"> <xs:complexType> <xs:sequence> <xs:any maxOccurs="2" processContents="lax"/> </xs:sequence> - </xs:complexType> + </xs:complexType> </xs:element> <xs:element name="a4"> <xs:complexType> <xs:choice> <xs:element name="b" maxOccurs="2"/> </xs:choice> - </xs:complexType> + </xs:complexType> </xs:element> <xs:element name="a5"> <xs:complexType> <xs:sequence> <xs:element name="b" maxOccurs="2"/> </xs:sequence> - </xs:complexType> + </xs:complexType> </xs:element> <xs:element name="b"/> </xs:schema>""") diff --git a/tests/validators/test_particles.py b/tests/validators/test_particles.py index e945d956..636f53f3 100644 --- a/tests/validators/test_particles.py +++ b/tests/validators/test_particles.py @@ -91,7 +91,7 @@ def test_has_occurs_restriction(self): <xs:element name="node8" minOccurs="3" maxOccurs="11"/> <xs:element name="node9" minOccurs="0" maxOccurs="0"/> </xs:sequence> - </xs:complexType> + </xs:complexType> </xs:schema>""") xsd_group = schema.types['barType'].content diff --git a/tests/validators/test_schemas.py b/tests/validators/test_schemas.py index 0f050609..effd9b72 100644 --- a/tests/validators/test_schemas.py +++ b/tests/validators/test_schemas.py @@ -114,7 +114,7 @@ def test_builtin_types(self): def test_resolve_qname(self): schema = self.schema_class(dedent("""\ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <xs:element name="root" /> </xs:schema>""")) @@ -358,7 +358,7 @@ def test_logging(self): def test_target_namespace(self): schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://xmlschema.test/ns"> <xs:element name="root"/> </xs:schema>""")) @@ -372,7 +372,7 @@ def test_target_namespace(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace=""> <xs:element name="root"/> </xs:schema>""")) @@ -382,14 +382,14 @@ def test_target_namespace(self): def test_block_default(self): schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="extension restriction "> <xs:element name="root"/> </xs:schema>""")) self.assertEqual(schema.block_default, 'extension restriction ') schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all"> <xs:element name="root"/> </xs:schema>""")) @@ -398,7 +398,7 @@ def test_block_default(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="all">> <xs:element name="root"/> </xs:schema>""")) @@ -408,7 +408,7 @@ def test_block_default(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all restriction">> <xs:element name="root"/> </xs:schema>""")) @@ -418,14 +418,14 @@ def test_block_default(self): def test_final_default(self): schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" finalDefault="extension restriction "> <xs:element name="root"/> </xs:schema>""")) self.assertEqual(schema.final_default, 'extension restriction ') schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" finalDefault="#all"> <xs:element name="root"/> </xs:schema>""")) @@ -434,7 +434,7 @@ def test_final_default(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" finalDefault="all">> <xs:element name="root"/> </xs:schema>""")) @@ -473,7 +473,7 @@ def test_version_control(self): <xs:element name="root"> <xs:complexType> <xs:attribute name="a" use="required"/> - <xs:assert test="@a > 300" vc:minVersion="1.1" + <xs:assert test="@a > 300" vc:minVersion="1.1" xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"/> </xs:complexType> </xs:element> @@ -481,8 +481,8 @@ def test_version_control(self): self.assertEqual(len(schema.root[0][0]), 1 if schema.XSD_VERSION == '1.0' else 2) schema = self.schema_class(dedent(""" - <xs:schema vc:minVersion="1.1" elementFormDefault="qualified" - xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema vc:minVersion="1.1" elementFormDefault="qualified" + xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"> <xs:element name="root"/> </xs:schema>""")) @@ -586,7 +586,7 @@ def test_multi_schema_initialization(self): self.assertIn("global element with name='elem2' is already defined", str(ec.exception)) source1 = dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://xmlschema.test/ns"> <xs:element name="elem1"/> </xs:schema>""") @@ -600,7 +600,7 @@ def test_multi_schema_initialization(self): def test_add_schema(self): source1 = dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://xmlschema.test/ns"> <xs:element name="elem1"/> </xs:schema>""") @@ -611,7 +611,7 @@ def test_add_schema(self): </xs:schema>""") source3 = dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://xmlschema.test/ns1"> <xs:element name="elem3"/> </xs:schema>""") @@ -852,7 +852,7 @@ class CustomXMLSchema(XMLSchema11): def test_default_attributes(self): schema = self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" defaultAttributes="attrs"> <xs:element name="root"/> <xs:attributeGroup name="attrs"> @@ -863,7 +863,7 @@ def test_default_attributes(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" defaultAttributes="attrs"> <xs:element name="root"/> </xs:schema>""")) @@ -871,7 +871,7 @@ def test_default_attributes(self): with self.assertRaises(XMLSchemaParseError) as ctx: self.schema_class(dedent("""\ - <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" defaultAttributes="x:attrs"> <xs:element name="root"/> </xs:schema>""")) diff --git a/tests/validators/test_simple_types.py b/tests/validators/test_simple_types.py index 4e6fc3b4..b1a338bd 100644 --- a/tests/validators/test_simple_types.py +++ b/tests/validators/test_simple_types.py @@ -36,7 +36,7 @@ def test_simple_types(self): self.assertEqual(xs.types['test_union'].elem.tag, XSD_UNION) def test_variety_property(self): - schema = self.check_schema(""" + schema = self.check_schema(""" <xs:simpleType name="atomicType"> <xs:restriction base="xs:string"/> </xs:simpleType> @@ -156,7 +156,7 @@ def test_is_empty(self): <xs:length value="0"/> </xs:restriction> </xs:simpleType> - + <xs:simpleType name="emptyType3"> <xs:restriction base="xs:string"> <xs:enumeration value=""/> diff --git a/tests/validators/test_wildcards.py b/tests/validators/test_wildcards.py index 538a025b..26e9a5cd 100644 --- a/tests/validators/test_wildcards.py +++ b/tests/validators/test_wildcards.py @@ -767,7 +767,7 @@ def test_any_wildcard(self): <xs:complexType name="taggedType"> <xs:sequence> <xs:element name="tag" type="xs:string"/> - <xs:any namespace="##targetNamespace" + <xs:any namespace="##targetNamespace" notQName="##defined tns1:foo ##definedSibling"/> </xs:sequence> </xs:complexType> diff --git a/tests/validators/test_xsdbase.py b/tests/validators/test_xsdbase.py index 1c60aac2..161b0496 100644 --- a/tests/validators/test_xsdbase.py +++ b/tests/validators/test_xsdbase.py @@ -193,7 +193,7 @@ def test_representation(self): <xs:simpleContent> <xs:extension base="xs:string"> <xs:attribute ref="slot"/> - </xs:extension> + </xs:extension> </xs:simpleContent> </xs:complexType> </xs:element> @@ -320,13 +320,13 @@ def test_parse_target_namespace(self): <xs:complexContent> <xs:restriction base="type0"> <xs:sequence> - <xs:element name="elem1" targetNamespace="http://xmlschema.test/ns" + <xs:element name="elem1" targetNamespace="http://xmlschema.test/ns" type="xs:integer"/> </xs:sequence> </xs:restriction> - </xs:complexContent> + </xs:complexContent> </xs:complexType> - <xs:element name="root" type="type1"/> + <xs:element name="root" type="type1"/> </xs:schema>""") self.assertEqual(schema.elements['root'].type.content[0].target_namespace, 'http://xmlschema.test/ns') @@ -335,7 +335,7 @@ def test_parse_target_namespace(self): <xs:element name="root"> <xs:complexType> <xs:sequence> - <xs:element name="node" targetNamespace=""/> + <xs:element name="node" targetNamespace=""/> </xs:sequence> </xs:complexType> </xs:element> @@ -510,7 +510,7 @@ class TestXsdType(unittest.TestCase): def setUpClass(cls): cls.schema = XMLSchema10(dedent("""\ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> - + <xs:simpleType name="emptyType"> <xs:restriction base="xs:string"> <xs:length value="0"/> @@ -519,7 +519,7 @@ def setUpClass(cls): <xs:complexType name="emptyType2"> <xs:attribute name="foo" type="xs:string"/> - </xs:complexType> + </xs:complexType> <xs:simpleType name="idType"> <xs:restriction base="xs:ID"/> @@ -532,7 +532,7 @@ def setUpClass(cls): <xs:simpleType name="dateTimeType"> <xs:restriction base="xs:dateTime"/> </xs:simpleType> - + <xs:simpleType name="fooType"> <xs:restriction base="xs:string"/> </xs:simpleType> @@ -544,12 +544,12 @@ def setUpClass(cls): <xs:simpleType name="fooUnionType"> <xs:union memberTypes="xs:string xs:anyURI"/> </xs:simpleType> - + <xs:complexType name="barType"> <xs:sequence> <xs:element name="node"/> </xs:sequence> - </xs:complexType> + </xs:complexType> <xs:complexType name="barExtType"> <xs:complexContent> @@ -559,7 +559,7 @@ def setUpClass(cls): </xs:sequence> </xs:extension> </xs:complexContent> - </xs:complexType> + </xs:complexType> <xs:complexType name="barResType"> <xs:complexContent> @@ -569,17 +569,17 @@ def setUpClass(cls): </xs:sequence> </xs:restriction> </xs:complexContent> - </xs:complexType> + </xs:complexType> <xs:complexType name="mixedType" mixed="true"> <xs:sequence> <xs:element name="node" type="xs:string"/> </xs:sequence> - </xs:complexType> + </xs:complexType> <xs:element name="fooElem" type="fooType"/> <xs:element name="barElem" type="barType" block="extension"/> - + </xs:schema>""")) def test_content_type_label(self): diff --git a/tox.ini b/tox.ini index 5f6a32f7..76de4c00 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,6 @@ deps = py{39,310}: memory_profiler docs: Sphinx docs: sphinx_rtd_theme - flake8: flake8 coverage: coverage commands = python -m unittest @@ -37,8 +36,11 @@ commands = max-line-length = 100 [testenv:flake8] +deps = + flake8 commands = flake8 xmlschema + flake8 tests [testenv:mypy-py37] deps = diff --git a/xmlschema/extras/templates/python/bindings.py.jinja b/xmlschema/extras/templates/python/bindings.py.jinja index 0b47a05c..c1e2a376 100644 --- a/xmlschema/extras/templates/python/bindings.py.jinja +++ b/xmlschema/extras/templates/python/bindings.py.jinja @@ -21,11 +21,11 @@ __NAMESPACE__ = "{{ schema.target_namespace }}" schema = xmlschema.XMLSchema10("{{ schema.name }}") {%- else -%} schema = xmlschema.XMLSchema11("{{ schema.name }}") -{%- endif %} +{%- endif -%} {# Bindings for global elements #} -{%- for xsd_element in schema.elements.values() %} +{% for xsd_element in schema.elements.values() %} + class {{ xsd_element|name|capitalize }}Binding(DataElement, metaclass=DataBindingMeta): xsd_element = schema.elements['{{ xsd_element.local_name }}'] - -{% endfor %} \ No newline at end of file +{% endfor %} From a54cd729d20b7c479e409dfa894c153459d5e138 Mon Sep 17 00:00:00 2001 From: Davide Brunato <brunato@sissa.it> Date: Sun, 17 Jul 2022 08:23:58 +0200 Subject: [PATCH 15/17] Use attrgetter for sorting elements by name --- xmlschema/validators/schemas.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/xmlschema/validators/schemas.py b/xmlschema/validators/schemas.py index caf0b312..69e9b6c3 100644 --- a/xmlschema/validators/schemas.py +++ b/xmlschema/validators/schemas.py @@ -14,13 +14,7 @@ XMLSchema11 for XSD 1.1. The latter class parses also XSD 1.0 schemas, as prescribed by the standard. """ -import sys - -if sys.version_info < (3, 7): - from typing import GenericMeta as ABCMeta -else: - from abc import ABCMeta - +from abc import ABCMeta import os import logging import threading @@ -29,6 +23,7 @@ import sys from copy import copy from itertools import chain +from operator import attrgetter from typing import cast, Callable, ItemsView, List, Optional, Dict, Any, \ Set, Union, Tuple, Type, Iterator, Counter from xml.etree.ElementTree import Element, ParseError @@ -77,6 +72,8 @@ logger = logging.getLogger('xmlschema') +name_attribute = attrgetter('name') + XSD_VERSION_PATTERN = re.compile(r'^\d+\.\d+$') DRIVE_PATTERN = re.compile(r'^[a-zA-Z]:$') @@ -561,10 +558,10 @@ def __setattr__(self, name: str, value: Any) -> None: super(XMLSchemaBase, self).__setattr__(name, value) def __iter__(self) -> Iterator[XsdElement]: - yield from sorted(self.elements.values(), key=lambda x: x.name) + yield from sorted(self.elements.values(), key=name_attribute) def __reversed__(self) -> Iterator[XsdElement]: - yield from sorted(self.elements.values(), key=lambda x: x.name, reverse=True) + yield from sorted(self.elements.values(), key=name_attribute, reverse=True) def __len__(self) -> int: return len(self.elements) From 1d65d841c4998e2b4b605f2c3763253aeb7131a9 Mon Sep 17 00:00:00 2001 From: Davide Brunato <brunato@sissa.it> Date: Mon, 18 Jul 2022 14:08:04 +0200 Subject: [PATCH 16/17] Extend name matching for DataElement.get() and DataElement.set() - Now prefixed name are converted to extended form using the nsmap of DataElement instance - Improvement to simplify access to attributes (issue #314) --- tests/test_dataobjects.py | 20 ++++++++++++++++++++ xmlschema/dataobjects.py | 19 ++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/test_dataobjects.py b/tests/test_dataobjects.py index 6aa8f4d6..294ef10d 100644 --- a/tests/test_dataobjects.py +++ b/tests/test_dataobjects.py @@ -48,6 +48,26 @@ def test_namespaces(self): nsmap = {'tns': 'http://xmlschema.test/ns'} self.assertEqual(DataElement('foo', nsmap=nsmap).nsmap, nsmap) + def test_attributes_with_namespaces(self): + nsmap = {'tns': 'http://xmlschema.test/ns'} + attrib = {'a': 10, '{http://xmlschema.test/ns}b': 'bar'} + element = DataElement('foo', attrib=attrib, nsmap=nsmap) + + self.assertEqual(element.get('{http://xmlschema.test/ns}b'), 'bar') + self.assertEqual(element.get('tns:b'), 'bar') + self.assertIsNone(element.get('tns:c')) + + with self.assertRaises(ValueError) as ctx: + element.get('tns:b:c') + self.assertIn("'tns:b:c' has a wrong format", str(ctx.exception)) + + with self.assertRaises(KeyError) as ctx: + element.get('tns0:b') + self.assertIn("prefix 'tns0' not found ", str(ctx.exception)) + + self.assertIsNone(element.set('tns:c', 8)) + self.assertEqual(element.get('tns:c'), 8) + def test_text_value(self): self.assertIsNone(DataElement('foo').text) self.assertEqual(DataElement('foo', value=True).text, 'true') diff --git a/xmlschema/dataobjects.py b/xmlschema/dataobjects.py index 692e8fa2..2329b4cf 100644 --- a/xmlschema/dataobjects.py +++ b/xmlschema/dataobjects.py @@ -14,7 +14,8 @@ from elementpath import XPathContext, XPath2Parser, build_node_tree, protocols from elementpath.etree import etree_tostring -from .exceptions import XMLSchemaAttributeError, XMLSchemaTypeError, XMLSchemaValueError +from .exceptions import XMLSchemaAttributeError, XMLSchemaKeyError, XMLSchemaTypeError, \ + XMLSchemaValueError from .aliases import ElementType, XMLSourceType, NamespacesType, BaseXsdType, DecodeType from .helpers import get_namespace, get_prefixed_qname, local_name, raw_xml_encode from .converters import ElementData, XMLSchemaConverter @@ -136,12 +137,28 @@ def text(self) -> Optional[str]: """The string value of the data element.""" return raw_xml_encode(self.value) + def _map_attribute_prefixed_name(self, name: str) -> str: + try: + prefix, _local_name = name.split(':') + except ValueError: + raise XMLSchemaValueError(f'{name!r} has a wrong format') from None + else: + try: + return '{%s}%s' % (self.nsmap[prefix], _local_name) + except KeyError: + msg = f'prefix {prefix!r} not found in {self} nsmap' + raise XMLSchemaKeyError(msg) from None + def get(self, key: str, default: Any = None) -> Any: """Gets a data element attribute.""" + if not key.startswith('{') and ':' in key: + key = self._map_attribute_prefixed_name(key) return self.attrib.get(key, default) def set(self, key: str, value: Any) -> None: """Sets a data element attribute.""" + if not key.startswith('{') and ':' in key: + key = self._map_attribute_prefixed_name(key) self.attrib[key] = value @property From 427452ed18c43576b9d7b96dc26678bdada57eea Mon Sep 17 00:00:00 2001 From: Davide Brunato <brunato@sissa.it> Date: Mon, 18 Jul 2022 15:45:01 +0200 Subject: [PATCH 17/17] Update release info and requirements --- .github/workflows/test-xmlschema.yml | 4 ++-- CHANGELOG.rst | 7 +++++++ publiccode.yml | 2 +- requirements-dev.txt | 2 +- setup.py | 2 +- tox.ini | 2 +- 6 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-xmlschema.yml b/.github/workflows/test-xmlschema.yml index 82b32c9a..bce51a93 100644 --- a/.github/workflows/test-xmlschema.yml +++ b/.github/workflows/test-xmlschema.yml @@ -49,10 +49,10 @@ jobs: - name: Lint with mypy if Python version != 3.7 if: ${{ matrix.python-version != '3.7' }} run: | - pip install mypy==0.950 elementpath==2.5.1 lxml-stubs + pip install mypy==0.950 elementpath==3.0.0 lxml-stubs mypy --show-error-codes --strict xmlschema - name: Lint with mypy if Python version == 3.7 if: ${{ matrix.python-version == '3.7' }} run: | - pip install mypy==0.961 elementpath==2.5.3 lxml-stubs + pip install mypy==0.961 elementpath==3.0.0 lxml-stubs mypy --show-error-codes --no-warn-redundant-casts --no-warn-unused-ignores --strict xmlschema diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5ba56bd2..62a30942 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ CHANGELOG ********* +`v2.0.0`_ (2022-07-18) +====================== +* Refactor XPath interface for the full XPath node implementation of elementpath v3.0 +* Fix BadgerFishConverter with mixed content (issue #315) +* Improve get() and set() of DataElement (issue #314) + `v1.11.3`_ (2022-06-24) ======================= * Fix invalid element not detected with empty particle (issue #306) @@ -543,3 +549,4 @@ v0.9.6 (2017-05-05) .. _v1.11.1: https://github.com/brunato/xmlschema/compare/v1.11.0...v1.11.1 .. _v1.11.2: https://github.com/brunato/xmlschema/compare/v1.11.1...v1.11.2 .. _v1.11.3: https://github.com/brunato/xmlschema/compare/v1.11.2...v1.11.3 +.. _v2.0.0: https://github.com/brunato/xmlschema/compare/v1.11.3...v2.0.0 diff --git a/publiccode.yml b/publiccode.yml index 9586f1ec..0f040f1a 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2022-XX-XX' +releaseDate: '2022-07-18' softwareVersion: v2.0.0 developmentStatus: stable platforms: diff --git a/requirements-dev.txt b/requirements-dev.txt index 792729f1..88dbc1b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ setuptools tox coverage --e ../elementpath # elementpath>=3.0.0, <4.0.0 +elementpath>=3.0.0, <4.0.0 lxml jinja2 memory_profiler diff --git a/setup.py b/setup.py index 272018b5..d5e6017f 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ ] }, python_requires='>=3.7', - install_requires=['elementpath>=2.0.0, <4.0.0'], + install_requires=['elementpath>=3.0.0, <4.0.0'], extras_require={ 'codegen': ['elementpath>=3.0.0, <4.0.0', 'jinja2'], 'dev': ['tox', 'coverage', 'lxml', 'elementpath>=3.0.0, <4.0.0', diff --git a/tox.ini b/tox.ini index 76de4c00..96b33800 100644 --- a/tox.ini +++ b/tox.ini @@ -71,7 +71,7 @@ commands = deps = pytest pytest-randomly - elementpath~=3.0.0 + elementpath>=3.0.0, <4.0.0 lxml jinja2 mypy==0.961