Skip to content

Commit

Permalink
Fix schema export (issue #387)
Browse files Browse the repository at this point in the history
  - Fix the replacement pattern in export_schema()
  - Add loglevel argument, apply with a decorator
  - Add logger.debug statements
  - Don't remove non-remote residuals schemaLocation entries
  • Loading branch information
brunato committed Mar 12, 2024
1 parent c5c37f4 commit b3bd373
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 16 deletions.
10 changes: 10 additions & 0 deletions tests/validators/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,16 @@ def test_export_remote__issue_187(self):

self.assertFalse(os.path.isdir(dirname))

# Test with DEBUG logging level
with tempfile.TemporaryDirectory() as dirname:
with self.assertLogs('xmlschema', level='DEBUG') as ctx:
vh_schema.export(target=dirname, save_remote=True, loglevel='DEBUG')
self.assertGreater(len(ctx.output), 0)
self.assertTrue(any('Write modified XSD' in line for line in ctx.output))
self.assertTrue(any('Write unchanged XSD' in line for line in ctx.output))

self.assertFalse(os.path.isdir(dirname))

@unittest.skipIf(platform.system() == 'Windows', 'skip, Windows systems save with <CR><LF>')
def test_export_other_encoding(self):
schema_file = self.casepath('examples/menù/menù.xsd')
Expand Down
44 changes: 35 additions & 9 deletions xmlschema/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#
import re
import pathlib
import logging
from itertools import chain
from typing import TYPE_CHECKING, Any, Optional, List
from urllib.parse import unquote, urlsplit
Expand All @@ -20,10 +21,15 @@
if TYPE_CHECKING:
from .validators import XMLSchemaBase

logger = logging.getLogger('xmlschema')

FIND_PATTERN = r'\bschemaLocation\s*=\s*[\'"]([^\'"]*)[\'"]'
REPLACE_PATTERN = r'\bschemaLocation\s*=\s*[\'"]\s*{0}\s*[\'"]'


def replace_location(text: str, location: str, repl_location: str) -> str:
repl = 'schemaLocation="{}"'.format(repl_location)
pattern = r'\bschemaLocation\s*=\s*[\'\"].*%s.*[\'"]' % re.escape(location)
pattern = REPLACE_PATTERN.format(re.escape(location))
return re.sub(pattern, repl, text)


Expand All @@ -32,6 +38,10 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
remove_residuals: bool = True,
exclude_locations: Optional[List[str]] = None) -> None:

def residuals_filter(x: str) -> Any:
return is_remote_url(x) and x not in schema.includes and \
exclude_locations and x not in exclude_locations

target_path = pathlib.Path(target_dir)
if target_path.is_dir():
if list(target_path.iterdir()):
Expand All @@ -50,9 +60,13 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
name = obj.name or 'schema.xsd'
exports: Any = {obj: [LocationPath(unquote(name)), obj.get_text(), False]}
path: Any
modified_schemas: Any = set()

if exclude_locations is None:
exclude_locations = []

logger.debug("Start export of schema %r", name)

while True:
current_length = len(exports)

Expand All @@ -61,17 +75,19 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
continue # Skip already processed schemas
exports[schema][2] = True

logger.debug("Process schema instance %r", schema)

dir_path = exports[schema][0].parent
imports_items = [(x.url, x) for x in schema.imports.values()
if x is not None and x.meta_schema is not None]

pattern = r'\bschemaLocation\s*=\s*[\'\"](.*)[\'"]'
schema_locations = set(
x.strip() for x in re.findall(pattern, exports[schema][1])
x.strip() for x in re.findall(FIND_PATTERN, exports[schema][1])
)

for location, ref_schema in chain(schema.includes.items(), imports_items):
if location in exclude_locations:
logger.debug("Location %r is excluded by argument", location)
continue

# Find matching schema location
Expand All @@ -86,6 +102,7 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
location = matching_items[0]
schema_locations.remove(location)
elif not matching_items:
logger.debug("Unmatched location %r, skip ...", location)
continue
else:
for item in matching_items:
Expand All @@ -98,6 +115,8 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
location = matching_items[0]
schema_locations.remove(location)

logger.debug("Matched location %r", location)

if is_remote_url(location):
if not save_remote:
continue
Expand Down Expand Up @@ -150,18 +169,20 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
repl_path = LocationPath(prefix).joinpath(repl_path)

repl = repl_path.as_posix()
logger.debug("Replace location %r with %r", location, repl)

exports[schema][1] = replace_location(exports[schema][1], location, repl)
modified_schemas.add(schema)

if ref_schema not in exports:
exports[ref_schema] = [path, ref_schema.get_text(), False]

if remove_residuals:
# Deactivate residual redundant imports from remote URLs
for location in filter(
lambda x: x not in schema.includes and x not in exclude_locations,
schema_locations
):
if is_remote_url(location):
exports[schema][1] = replace_location(exports[schema][1], location, '')
for location in filter(residuals_filter, schema_locations):
logger.debug("Clear residual remote location %r", location)
exports[schema][1] = replace_location(exports[schema][1], location, '')
modified_schemas.add(schema)

if current_length == len(exports):
break
Expand Down Expand Up @@ -190,5 +211,10 @@ def export_schema(obj: 'XMLSchemaBase', target_dir: str,
if re_match is not None:
encoding = re_match.group(0).lower()

if schema in modified_schemas:
logger.debug("Write modified XSD source to %s", filepath)
else:
logger.debug("Write unchanged XSD source to %s", filepath)

with filepath.open(mode='w', encoding=encoding) as fp:
fp.write(text)
23 changes: 16 additions & 7 deletions xmlschema/validators/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
EncodeType, BaseXsdType, ExtraValidatorType, ValidationHookType, UriMapperType, \
SchemaGlobalType, FillerType, DepthFillerType, ValueHookType, ElementHookType
from ..translation import gettext as _
from ..helpers import set_logging_level, prune_etree, get_namespace, get_qname, \
is_defuse_error
from ..helpers import set_logging_level, logged, prune_etree, get_namespace, \
get_qname, is_defuse_error
from ..namespaces import NamespaceResourcesMap, NamespaceMapper, NamespaceView
from ..locations import is_local_url, is_remote_url, url_path_is_file, \
normalize_url, normalize_locations
Expand Down Expand Up @@ -1501,19 +1501,28 @@ def add_schema(self, source: SchemaSourceType,
def export(self, target: str,
save_remote: bool = False,
remove_residuals: bool = True,
exclude_locations: Optional[List[str]] = None) -> None:
exclude_locations: Optional[List[str]] = None,
loglevel: Optional[Union[str, int]] = None) -> None:
"""
Exports a schema instance. The schema instance is exported to a
directory with also the hierarchy of imported/included schemas.
:param target: a path to a local empty directory.
:param save_remote: if `True` is provided saves also remote schemas.
:param remove_residuals: for default removes residual schema locations \
from redundant import statements.
:param remove_residuals: for default removes residual remote schema \
locations from redundant import statements.
:param exclude_locations: explicitly exclude schema locations from \
substitution or removal.
"""
export_schema(self, target, save_remote, remove_residuals, exclude_locations)
:param loglevel: for setting a different logging level for schema export.
"""
logged(export_schema)(
obj=self,
target_dir=target,
save_remote=save_remote,
remove_residuals=remove_residuals,
exclude_locations=exclude_locations,
loglevel=loglevel
)

def version_check(self, elem: ElementType) -> bool:
"""
Expand Down

0 comments on commit b3bd373

Please sign in to comment.