Skip to content

Commit

Permalink
Redefine XPath parsers usage on schema instances
Browse files Browse the repository at this point in the history
  - ElementPathMixin does not need binding with schema proxy
  - Leave _xpath_lock only for assertions on complex types
  - Add threading lock for schemas (not only for meta-schemas)
    used during the building of XPath constructor functions
  • Loading branch information
brunato committed Mar 14, 2021
1 parent e0e4ca4 commit 506939e
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 66 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
CHANGELOG
*********

`v1.5.3`_ (2021-03-14)
======================
* Remove unnecessary bindings with schema proxy from ElementPathMixin
to avoid conflicts when schema is used by an XPath 3 parser
* Fix schema logger (issue #228)

`v1.5.2`_ (2021-03-04)
======================
* Improve empty content checking
Expand Down Expand Up @@ -416,3 +422,4 @@ v0.9.6 (2017-05-05)
.. _v1.5.0: https://github.com/brunato/xmlschema/compare/v1.4.2...v1.5.0
.. _v1.5.1: https://github.com/brunato/xmlschema/compare/v1.5.0...v1.5.1
.. _v1.5.2: https://github.com/brunato/xmlschema/compare/v1.5.1...v1.5.2
.. _v1.5.3: https://github.com/brunato/xmlschema/compare/v1.5.2...v1.5.3
4 changes: 2 additions & 2 deletions publiccode.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2'
name: xmlschema
url: 'https://github.com/sissaschool/xmlschema'
landingURL: 'https://github.com/sissaschool/xmlschema'
releaseDate: '2021-03-04'
softwareVersion: v1.5.2
releaseDate: '2021-03-14'
softwareVersion: v1.5.3
developmentStatus: stable
platforms:
- linux
Expand Down
9 changes: 3 additions & 6 deletions xmlschema/validators/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ class XsdAssert(XsdComponent, ElementPathMixin):
path = 'true()'

def __init__(self, elem, schema, parent, base_type):
self._xpath_lock = threading.Lock()
self.base_type = base_type
self._assert_xpath_lock = threading.Lock() # Lock for assertion XPath operations
super(XsdAssert, self).__init__(elem, schema, parent)
ElementPathMixin.__init__(self)

def __repr__(self):
if len(self.path) < 40:
Expand All @@ -48,16 +47,12 @@ def __repr__(self):

def __getstate__(self):
state = self.__dict__.copy()
state.pop('_assert_xpath_lock', None)
state.pop('_xpath_lock', None)
state.pop('_xpath_parser', None)
state.pop('xpath_tokens', None) # For schema objects
return state

def __setstate__(self, state):
self.__dict__.update(state)
self._xpath_lock = threading.Lock()
self._assert_xpath_lock = threading.Lock()

def _parse(self):
super(XsdAssert, self)._parse()
Expand All @@ -79,6 +74,8 @@ def built(self):
return self.token is not None and (self.base_type.parent is None or self.base_type.built)

def build(self):
# Assert requires a schema bound parser because select
# is on XML elements and with XSD type decoded values
self.parser = XPath2Parser(
namespaces=self.namespaces,
variable_types={'value': self.base_type.sequence_type},
Expand Down
4 changes: 0 additions & 4 deletions xmlschema/validators/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)

binding = None

def __init__(self, elem, schema, parent):
super(XsdElement, self).__init__(elem, schema, parent)
ElementPathMixin.__init__(self)

def __repr__(self):
return '%s(%s=%r, occurs=%r)' % (
self.__class__.__name__,
Expand Down
22 changes: 16 additions & 6 deletions xmlschema/validators/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def get_attribute(attr, *args):
# Defining a subclass without a meta-schema (eg. XMLSchemaBase)
return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_)
dict_['meta_schema'] = None
dict_['lock'] = threading.Lock() # Lock instance for shared meta-schemas

xsd_version = dict_.get('XSD_VERSION') or get_attribute('XSD_VERSION', *bases)
if xsd_version not in ('1.0', '1.1'):
Expand Down Expand Up @@ -130,7 +129,6 @@ def get_attribute(attr, *args):
schema_location = meta_schema.url if isinstance(meta_schema, XMLSchemaBase) else meta_schema
meta_schema = meta_schema_class.create_meta_schema(schema_location)
dict_['meta_schema'] = meta_schema
dict_.pop('lock')

return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_)

Expand Down Expand Up @@ -285,13 +283,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin, metaclass=X
default_attributes = None
default_open_content = None
override = None

# Store XPath constructors tokens (for schema and its assertions)
xpath_tokens = None

def __init__(self, source, namespace=None, validation='strict', global_maps=None,
converter=None, locations=None, base_url=None, allow='all', defuse='remote',
timeout=300, build=True, use_meta=True, use_fallback=True, loglevel=None):
super(XMLSchemaBase, self).__init__(validation)
ElementPathMixin.__init__(self)
self.lock = threading.Lock() # Lock for build operations

if loglevel is not None:
if isinstance(loglevel, str):
Expand Down Expand Up @@ -456,9 +456,15 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None
if loglevel is not None:
logger.setLevel(logging.WARNING) # Restore default logging

@property
def name(self):
return os.path.basename(self.url) if self.url else None
def __getstate__(self):
state = self.__dict__.copy()
state.pop('lock', None)
state.pop('xpath_tokens', None)
return state

def __setstate__(self, state):
self.__dict__.update(state)
self.lock = threading.Lock()

def __repr__(self):
if self.url:
Expand Down Expand Up @@ -500,6 +506,10 @@ def __reversed__(self):
def __len__(self):
return len(self.elements)

@property
def name(self):
return os.path.basename(self.url) if self.url else None

@property
def xpath_proxy(self):
return XMLSchemaProxy(self)
Expand Down
5 changes: 2 additions & 3 deletions xmlschema/validators/wildcards.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,8 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin):
_ADMITTED_TAGS = {XSD_ANY}
precedences = ()

def __init__(self, elem, schema, parent, maps=None):
super(XsdAnyElement, self).__init__(elem, schema, parent, maps)
ElementPathMixin.__init__(self)
def __init__(self, elem, schema, parent):
super(XsdAnyElement, self).__init__(elem, schema, parent)

def __repr__(self):
if self.namespace:
Expand Down
72 changes: 27 additions & 45 deletions xmlschema/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from abc import abstractmethod
from collections.abc import Sequence
import re
import threading

from elementpath import AttributeNode, TypedElement, XPath2Parser, \
XPathSchemaContext, AbstractSchemaProxy
Expand Down Expand Up @@ -93,17 +92,17 @@ def xsd_version(self):
return self._schema.XSD_VERSION

def bind_parser(self, parser):
if parser.schema is not self:
parser.schema = self

if self._schema.xpath_tokens is None:
parser.symbol_table = parser.__class__.symbol_table.copy()
for xsd_type in self.iter_atomic_types():
parser.schema_constructor(xsd_type.name)
self._schema.xpath_tokens = parser.symbol_table
else:
parser.symbol_table = self._schema.xpath_tokens
parser.tokenizer = parser.create_tokenizer(parser.symbol_table)
parser.schema = self
parser.symbol_table = parser.__class__.symbol_table.copy()

with self._schema.lock:
if self._schema.xpath_tokens is None:
self._schema.xpath_tokens = {
xsd_type.name: parser.schema_constructor(xsd_type.name)
for xsd_type in self.iter_atomic_types()
}

parser.symbol_table.update(self._schema.xpath_tokens)

def get_context(self):
return XMLSchemaContext(
Expand Down Expand Up @@ -164,7 +163,7 @@ def get_primitive_type(self, xsd_type):

class ElementPathMixin(Sequence):
"""
Mixin abstract class for enabling ElementTree and XPath API on XSD components.
Mixin abstract class for enabling ElementTree and XPath 2.0 API on XSD components.
:cvar text: the Element text, for compatibility with the ElementTree API.
:cvar tail: the Element tail, for compatibility with the ElementTree API.
Expand All @@ -175,22 +174,6 @@ class ElementPathMixin(Sequence):
namespaces = {}
xpath_default_namespace = ''

_xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use.

def __init__(self):
self._xpath_lock = threading.Lock() # Lock for XPath operations

def __getstate__(self):
state = self.__dict__.copy()
state.pop('_xpath_lock', None)
state.pop('_xpath_parser', None)
state.pop('xpath_tokens', None) # For schema objects
return state

def __setstate__(self, state):
self.__dict__.update(state)
self._xpath_lock = threading.Lock()

@abstractmethod
def __iter__(self):
raise NotImplementedError
Expand Down Expand Up @@ -243,19 +226,6 @@ def _get_xpath_namespaces(self, namespaces=None):
xpath_namespaces.update(namespaces)
return xpath_namespaces

def _xpath_parse(self, path, namespaces=None):
path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path

namespaces = self._get_xpath_namespaces(namespaces)
with self._xpath_lock:
parser = self._xpath_parser
if parser is None:
parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
self._xpath_parser = parser
else:
parser.namespaces = namespaces
return parser.parse(path)

def find(self, path, namespaces=None):
"""
Finds the first XSD subelement matching the path.
Expand All @@ -264,8 +234,12 @@ def find(self, path, namespaces=None):
:param namespaces: an optional mapping from namespace prefix to namespace URI.
:return: the first matching XSD subelement or ``None`` if there is no match.
"""
path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path
namespaces = self._get_xpath_namespaces(namespaces)
parser = XPath2Parser(namespaces, strict=False)
context = XMLSchemaContext(self)
return next(self._xpath_parse(path, namespaces).select_results(context), None)

return next(parser.parse(path).select_results(context), None)

def findall(self, path, namespaces=None):
"""
Expand All @@ -276,8 +250,12 @@ def findall(self, path, namespaces=None):
:return: a list containing all matching XSD subelements in document order, an empty \
list is returned if there is no match.
"""
path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path
namespaces = self._get_xpath_namespaces(namespaces)
parser = XPath2Parser(namespaces, strict=False)
context = XMLSchemaContext(self)
return self._xpath_parse(path, namespaces).get_results(context)

return parser.parse(path).get_results(context)

def iterfind(self, path, namespaces=None):
"""
Expand All @@ -287,8 +265,12 @@ def iterfind(self, path, namespaces=None):
:param namespaces: is an optional mapping from namespace prefix to full name.
:return: an iterable yielding all matching XSD subelements in document order.
"""
path = _REGEX_TAG_POSITION.sub('', path.strip()) # Strips tags positions from path
namespaces = self._get_xpath_namespaces(namespaces)
parser = XPath2Parser(namespaces, strict=False)
context = XMLSchemaContext(self)
return self._xpath_parse(path, namespaces).select_results(context)

return parser.parse(path).select_results(context)

def iter(self, tag=None):
"""
Expand Down

0 comments on commit 506939e

Please sign in to comment.