Redefine XPath parsers usage on schema instances

- ElementPathMixin does not need binding with schema proxy - Leave _xpath_lock only for assertions on complex types - Add threading lock for schemas (not only for meta-schemas) used during the building of XPath constructor functions
sissaschool · Mar 14, 2021 · 506939e · 506939e
1 parent e0e4ca4
commit 506939e
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 66 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,12 @@
 CHANGELOG
 *********
 
+`v1.5.3`_ (2021-03-14)
+======================
+* Remove unnecessary bindings with schema proxy from ElementPathMixin
+  to avoid conflicts when schema is used by an XPath 3 parser
+* Fix schema logger (issue #228)
+
 `v1.5.2`_ (2021-03-04)
 ======================
 * Improve empty content checking
@@ -416,3 +422,4 @@ v0.9.6 (2017-05-05)
 .. _v1.5.0: https://github.com/brunato/xmlschema/compare/v1.4.2...v1.5.0
 .. _v1.5.1: https://github.com/brunato/xmlschema/compare/v1.5.0...v1.5.1
 .. _v1.5.2: https://github.com/brunato/xmlschema/compare/v1.5.1...v1.5.2
+.. _v1.5.3: https://github.com/brunato/xmlschema/compare/v1.5.2...v1.5.3
diff --git a/publiccode.yml b/publiccode.yml
@@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2'
 name: xmlschema
 url: 'https://github.com/sissaschool/xmlschema'
 landingURL: 'https://github.com/sissaschool/xmlschema'
-releaseDate: '2021-03-04'
-softwareVersion: v1.5.2
+releaseDate: '2021-03-14'
+softwareVersion: v1.5.3
 developmentStatus: stable
 platforms:
   - linux

diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py
@@ -35,10 +35,9 @@ class XsdAssert(XsdComponent, ElementPathMixin):
     path = 'true()'
 
     def __init__(self, elem, schema, parent, base_type):
+        self._xpath_lock = threading.Lock()
         self.base_type = base_type
-        self._assert_xpath_lock = threading.Lock()  # Lock for assertion XPath operations
         super(XsdAssert, self).__init__(elem, schema, parent)
-        ElementPathMixin.__init__(self)
 
     def __repr__(self):
         if len(self.path) < 40:
@@ -48,16 +47,12 @@ def __repr__(self):
 
     def __getstate__(self):
         state = self.__dict__.copy()
-        state.pop('_assert_xpath_lock', None)
         state.pop('_xpath_lock', None)
-        state.pop('_xpath_parser', None)
-        state.pop('xpath_tokens', None)  # For schema objects
         return state
 
     def __setstate__(self, state):
         self.__dict__.update(state)
         self._xpath_lock = threading.Lock()
-        self._assert_xpath_lock = threading.Lock()
 
     def _parse(self):
         super(XsdAssert, self)._parse()
@@ -79,6 +74,8 @@ def built(self):
         return self.token is not None and (self.base_type.parent is None or self.base_type.built)
 
     def build(self):
+        # Assert requires a schema bound parser because select
+        # is on XML elements and with XSD type decoded values
         self.parser = XPath2Parser(
             namespaces=self.namespaces,
             variable_types={'value': self.base_type.sequence_type},

diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
@@ -81,10 +81,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
 
     binding = None
 
-    def __init__(self, elem, schema, parent):
-        super(XsdElement, self).__init__(elem, schema, parent)
-        ElementPathMixin.__init__(self)
-
     def __repr__(self):
         return '%s(%s=%r, occurs=%r)' % (
             self.__class__.__name__,

diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
@@ -93,7 +93,6 @@ def get_attribute(attr, *args):
             # Defining a subclass without a meta-schema (eg. XMLSchemaBase)
             return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_)
         dict_['meta_schema'] = None
-        dict_['lock'] = threading.Lock()  # Lock instance for shared meta-schemas
 
         xsd_version = dict_.get('XSD_VERSION') or get_attribute('XSD_VERSION', *bases)
         if xsd_version not in ('1.0', '1.1'):
@@ -130,7 +129,6 @@ def get_attribute(attr, *args):
         schema_location = meta_schema.url if isinstance(meta_schema, XMLSchemaBase) else meta_schema
         meta_schema = meta_schema_class.create_meta_schema(schema_location)
         dict_['meta_schema'] = meta_schema
-        dict_.pop('lock')
 
         return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_)
 
@@ -285,13 +283,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin, metaclass=X
     default_attributes = None
     default_open_content = None
     override = None
+
+    # Store XPath constructors tokens (for schema and its assertions)
     xpath_tokens = None
 
     def __init__(self, source, namespace=None, validation='strict', global_maps=None,
                  converter=None, locations=None, base_url=None, allow='all', defuse='remote',
                  timeout=300, build=True, use_meta=True, use_fallback=True, loglevel=None):
         super(XMLSchemaBase, self).__init__(validation)
-        ElementPathMixin.__init__(self)
+        self.lock = threading.Lock()  # Lock for build operations
 
         if loglevel is not None:
             if isinstance(loglevel, str):
@@ -456,9 +456,15 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None
             if loglevel is not None:
                 logger.setLevel(logging.WARNING)  # Restore default logging
 
-    @property
-    def name(self):
-        return os.path.basename(self.url) if self.url else None
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state.pop('lock', None)
+        state.pop('xpath_tokens', None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.lock = threading.Lock()
 
     def __repr__(self):
         if self.url:
@@ -500,6 +506,10 @@ def __reversed__(self):
     def __len__(self):
         return len(self.elements)
 
+    @property
+    def name(self):
+        return os.path.basename(self.url) if self.url else None
+
     @property
     def xpath_proxy(self):
         return XMLSchemaProxy(self)

diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
@@ -371,9 +371,8 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin):
     _ADMITTED_TAGS = {XSD_ANY}
     precedences = ()
 
-    def __init__(self, elem, schema, parent, maps=None):
-        super(XsdAnyElement, self).__init__(elem, schema, parent, maps)
-        ElementPathMixin.__init__(self)
+    def __init__(self, elem, schema, parent):
+        super(XsdAnyElement, self).__init__(elem, schema, parent)
 
     def __repr__(self):
         if self.namespace:

diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py
@@ -13,7 +13,6 @@
 from abc import abstractmethod
 from collections.abc import Sequence
 import re
-import threading
 
 from elementpath import AttributeNode, TypedElement, XPath2Parser, \
     XPathSchemaContext, AbstractSchemaProxy
@@ -93,17 +92,17 @@ def xsd_version(self):
         return self._schema.XSD_VERSION
 
     def bind_parser(self, parser):
-        if parser.schema is not self:
-            parser.schema = self
-
-        if self._schema.xpath_tokens is None:
-            parser.symbol_table = parser.__class__.symbol_table.copy()
-            for xsd_type in self.iter_atomic_types():
-                parser.schema_constructor(xsd_type.name)
-            self._schema.xpath_tokens = parser.symbol_table
-        else:
-            parser.symbol_table = self._schema.xpath_tokens
-        parser.tokenizer = parser.create_tokenizer(parser.symbol_table)
+        parser.schema = self
+        parser.symbol_table = parser.__class__.symbol_table.copy()
+
+        with self._schema.lock:
+            if self._schema.xpath_tokens is None:
+                self._schema.xpath_tokens = {
+                    xsd_type.name: parser.schema_constructor(xsd_type.name)
+                    for xsd_type in self.iter_atomic_types()
+                }
+
+        parser.symbol_table.update(self._schema.xpath_tokens)
 
     def get_context(self):
         return XMLSchemaContext(
@@ -164,7 +163,7 @@ def get_primitive_type(self, xsd_type):
 
 class ElementPathMixin(Sequence):
     """
-    Mixin abstract class for enabling ElementTree and XPath API on XSD components.
+    Mixin abstract class for enabling ElementTree and XPath 2.0 API on XSD components.
 
     :cvar text: the Element text, for compatibility with the ElementTree API.
     :cvar tail: the Element tail, for compatibility with the ElementTree API.
@@ -175,22 +174,6 @@ class ElementPathMixin(Sequence):
     namespaces = {}
     xpath_default_namespace = ''
 
-    _xpath_parser = None  # Internal XPath 2.0 parser, instantiated at first use.
-
-    def __init__(self):
-        self._xpath_lock = threading.Lock()  # Lock for XPath operations
-
-    def __getstate__(self):
-        state = self.__dict__.copy()
-        state.pop('_xpath_lock', None)
-        state.pop('_xpath_parser', None)
-        state.pop('xpath_tokens', None)  # For schema objects
-        return state
-
-    def __setstate__(self, state):
-        self.__dict__.update(state)
-        self._xpath_lock = threading.Lock()
-
     @abstractmethod
     def __iter__(self):
         raise NotImplementedError
@@ -243,19 +226,6 @@ def _get_xpath_namespaces(self, namespaces=None):
         xpath_namespaces.update(namespaces)
         return xpath_namespaces
 
-    def _xpath_parse(self, path, namespaces=None):
-        path = _REGEX_TAG_POSITION.sub('', path.strip())  # Strips tags positions from path
-
-        namespaces = self._get_xpath_namespaces(namespaces)
-        with self._xpath_lock:
-            parser = self._xpath_parser
-            if parser is None:
-                parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
-                self._xpath_parser = parser
-            else:
-                parser.namespaces = namespaces
-            return parser.parse(path)
-
     def find(self, path, namespaces=None):
         """
         Finds the first XSD subelement matching the path.
@@ -264,8 +234,12 @@ def find(self, path, namespaces=None):
         :param namespaces: an optional mapping from namespace prefix to namespace URI.
         :return: the first matching XSD subelement or ``None`` if there is no match.
         """
+        path = _REGEX_TAG_POSITION.sub('', path.strip())  # Strips tags positions from path
+        namespaces = self._get_xpath_namespaces(namespaces)
+        parser = XPath2Parser(namespaces, strict=False)
         context = XMLSchemaContext(self)
-        return next(self._xpath_parse(path, namespaces).select_results(context), None)
+
+        return next(parser.parse(path).select_results(context), None)
 
     def findall(self, path, namespaces=None):
         """
@@ -276,8 +250,12 @@ def findall(self, path, namespaces=None):
         :return: a list containing all matching XSD subelements in document order, an empty \
         list is returned if there is no match.
         """
+        path = _REGEX_TAG_POSITION.sub('', path.strip())  # Strips tags positions from path
+        namespaces = self._get_xpath_namespaces(namespaces)
+        parser = XPath2Parser(namespaces, strict=False)
         context = XMLSchemaContext(self)
-        return self._xpath_parse(path, namespaces).get_results(context)
+
+        return parser.parse(path).get_results(context)
 
     def iterfind(self, path, namespaces=None):
         """
@@ -287,8 +265,12 @@ def iterfind(self, path, namespaces=None):
         :param namespaces: is an optional mapping from namespace prefix to full name.
         :return: an iterable yielding all matching XSD subelements in document order.
         """
+        path = _REGEX_TAG_POSITION.sub('', path.strip())  # Strips tags positions from path
+        namespaces = self._get_xpath_namespaces(namespaces)
+        parser = XPath2Parser(namespaces, strict=False)
         context = XMLSchemaContext(self)
-        return self._xpath_parse(path, namespaces).select_results(context)
+
+        return parser.parse(path).select_results(context)
 
     def iter(self, tag=None):
         """