From d8ee56e72e9206b15d16874c3f8d1aa3f4b8cad9 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Thu, 9 Nov 2023 22:44:48 -0800 Subject: [PATCH] misc updates, type annotation, auto config validation *DRY internal type annotations for config * PoC top-level config API --- lib/yaml/__init__.py | 55 +++++++++++++++- lib/yaml/config.py | 139 +++++++++++++++++++++++++++------------ lib/yaml/cyaml.py | 18 ++--- lib/yaml/dumper.py | 15 +++-- lib/yaml/loader.py | 15 +++-- tests/lib/test_schema.py | 14 ++-- 6 files changed, 182 insertions(+), 74 deletions(-) diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index 30301c75..fb502fb4 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -1,3 +1,6 @@ +import copy +import sys +import typing as t from .error import * @@ -71,7 +74,7 @@ def compose_all(stream, Loader=Loader): finally: loader.dispose() -def load(stream, Loader): +def _old_load(stream, Loader): """ Parse the first YAML document in a stream and produce the corresponding Python object. @@ -254,7 +257,7 @@ def dump_all(documents, stream=None, Dumper=Dumper, if getvalue: return getvalue() -def dump(data, stream=None, Dumper=Dumper, **kwds): +def _old_dump(data, stream=None, Dumper=Dumper, **kwds): """ Serialize a Python object into a YAML stream. If stream is None, return the produced string instead. @@ -407,3 +410,51 @@ def to_yaml(cls, dumper, data): return dumper.represent_yaml_object(cls.yaml_tag, data, cls, flow_style=cls.yaml_flow_style) + +class _YamlConfig: + from yaml.config import _LoaderProtocol, _DumperProtocol + default_loader: _LoaderProtocol = SafeLoader + default_dumper: _DumperProtocol = SafeDumper + + def load(self, stream, loader: _LoaderProtocol | None = None, **kwargs) -> t.Any: + # FIXME: pop/resolve/warn on old Loader kwarg + loader = loader or self.default_loader + return loader.load(stream, Loader=loader, **kwargs) + + def dump(self, data, stream=None, dumper: _DumperProtocol | None = None, **kwargs) -> t.Any: + # FIXME: pop/resolve/warn on old Dumper kwarg + dumper = dumper or self.default_dumper + return dumper.dump(data, stream, dumper=dumper, **kwargs) + + def config_loader(self, loader: _LoaderProtocol, **kwargs) -> t.Self: + return self._overlay_copy(default_loader=loader, **kwargs) + + def config_dumper(self, dumper: _DumperProtocol, **kwargs) -> t.Self: + return self._overlay_copy(default_dumper=dumper, **kwargs) + + def _overlay_copy(self, default_loader: _LoaderProtocol = None, default_dumper=None, **kwargs) -> t.Self: + new_config = _YamlConfig() + + if default_loader: + new_config.default_loader = default_loader.config(**kwargs) + else: + new_config.default_loader = self.default_loader + + if default_dumper: + new_config.default_dumper = default_dumper.config(**kwargs) + else: + new_config.default_dumper = self.default_dumper + + return new_config + + def __getattr__(self, item): + return getattr(sys.modules[__name__], item) + + +_default_config = _YamlConfig() + +config_loader = _default_config.config_loader +config_dumper = _default_config.config_dumper +load = _default_config.load +dump = _default_config.dump +# FIXME: patch all the other top-level methods to use a config as well (and migrate the actual impls elsewhere)? diff --git a/lib/yaml/config.py b/lib/yaml/config.py index df8aeee8..fb8f0949 100644 --- a/lib/yaml/config.py +++ b/lib/yaml/config.py @@ -1,25 +1,62 @@ from __future__ import annotations +import inspect import typing as t -from functools import partialmethod +if t.TYPE_CHECKING and not hasattr(t, 'override'): + from typing_extensions import override + t.override = override + +from functools import lru_cache, partialmethod from .tagset import TagSet -T = t.TypeVar('T') +class _YamlConfigurable(t.Protocol): + _actual_sig: t.ClassVar[t.Callable[..., t.Any] | None] + _stored_config: t.ClassVar[dict[str, t.Any] | None] = {} + + @classmethod + def config(cls, *args, **kwargs) -> t.Any: ... -class LoaderConfigMixin: @classmethod - # FIXME: fix tagset type to use DataClasses, at least externally? - def config(cls: type[T], type_name: str | None = None, tagset: TagSet | ... = ..., **kwargs) -> type[T]: - if not type_name: - # FIXME: hash the inputs for a dynamic type name and cache it? - type_name = f'abcd_from_{cls.__name__}' + def _config_impl(cls, **kwargs) -> t.Any: ... + + def __init_subclass__(cls, **kwargs): + if cls.config.__name__ != '_config_impl': + cls._actual_sig = cls.config + + cls.config = cls._config_impl + + +class _LoaderProtocol(t.Protocol): + @classmethod + def load(cls, stream, loader: _LoaderProtocol | None = None, **kwargs) -> t.Any: + import yaml + return yaml._old_load(stream, Loader=loader or cls) + - new_type = t.cast(cls, type(type_name, (cls, ), {})) +def _type_factory(base_type: type[T], **kwargs) -> type[T]: + return type(f'Customized_{base_type.__name__}', (base_type,), {}) + + +class LoaderConfigMixin(_YamlConfigurable, _LoaderProtocol): + @classmethod + # FIXME: @t.override + #@lru_cache # FIXME: feels wrong, probably an issue + def _config_impl(cls, **kwargs) -> t.Any: + sig = inspect.signature(cls._actual_sig) + ba = sig.bind(**kwargs) + + new_type = _type_factory(cls) + + # FIXME: until all the builtins bootstrap this way, figure out a sane default for origin classes and user classes? + # FIXME: merge existing config; + new_type._stored_config = ba.kwargs # FIXME: add support for arbitrary kwargs passthru ala dumper? + tagset = ba.kwargs.get('tagset', ...) + if tagset is not ...: # FIXME: provide a base class hook/method for this reset new_type.yaml_implicit_resolvers = {} @@ -30,39 +67,40 @@ def config(cls: type[T], type_name: str | None = None, tagset: TagSet | ... = .. return new_type -class DumperConfigMixin: +class CommonLoaderConfig(LoaderConfigMixin): @classmethod - def config(cls: type[T], type_name: str | None = None, - tagset: TagSet | ... = ..., - # FIXME: make some of the more obscure style things "nicer" (eg enums?) or just pass through existing values? - default_style: str | ... = ..., default_flow_style: bool | ... = ..., - # FIXME: properly type-annotate the rest of these - canonical=..., indent=..., width=..., - allow_unicode=..., line_break=..., - encoding=..., explicit_start=..., explicit_end=..., - version=..., tags=..., sort_keys=..., - **kwargs) -> type[T]: - - if not type_name: - # FIXME: hash the inputs for a dynamic type name and cache it? - type_name = f'abcd_from_{cls.__name__}' - - # preserve wrapped config defaults for values where we didn't get a default - # FIXME: share this code with the one in __init__.dump_all (and implement on others) - dumper_init_kwargs = dict( - default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys, **kwargs) - - dumper_init_kwargs = {k: v for k, v in dumper_init_kwargs.items() if v is not ...} - - patched_init = partialmethod(cls.__init__, - **dumper_init_kwargs) - - new_type = t.cast(cls, type(type_name, (cls, ), {'__init__': patched_init})) + def config(cls: _LoaderProtocol, *, tagset: TagSet | ... = ...) -> _LoaderProtocol: ... + + +class _DumperProtocol(t.Protocol): + @classmethod + def dump(cls, data, stream=None, dumper: _DumperProtocol | None = None, **kwargs) -> t.Any: + import yaml + return yaml._old_dump(data, stream, Dumper=dumper or cls, **kwargs) + + +class DumperConfigMixin(_YamlConfigurable, _DumperProtocol): # FIXME: move the args opt-in to the mixin graft sites + @classmethod + # FIXME: @t.override + def _config_impl(cls, **kwargs) -> t.Any: + sig = inspect.signature(cls._actual_sig) + ba = sig.bind(**kwargs) + + # FIXME: merge existing config; + patched_init = partialmethod(cls.__init__, **ba.kwargs) + + new_type = _type_factory(cls) + + # FIXME: pass via dict in type constructor, or ? (dict breaks lru_cache on type_factory) + new_type.__init__ = patched_init + + # FIXME: until all the builtins bootstrap this way, figure out a sane default for origin classes and user classes? + # FIXME: merge existing config; + new_type._stored_config = ba.kwargs + + # FIXME: add support for arbitrary kwargs passthru ala dumper? + + tagset = ba.kwargs.get('tagset', ...) # FIXME: support all the dynamic dispatch types (multi*, etc) if tagset is not ...: @@ -73,3 +111,22 @@ def config(cls: type[T], type_name: str | None = None, new_type.init_representers(tagset.representers) return new_type + + +class CommonDumperConfig(DumperConfigMixin): + @classmethod + def config(cls: _DumperProtocol, *, + tagset: TagSet | ... = ..., + default_style: str | None | ... = ..., + default_flow_style: str | None | ... = ..., + canonical: bool | None | ... = ..., + indent: int | None | ... = ..., + width: int | None | ... = ..., + allow_unicode: bool | None | ... = ..., + line_break: bool | None | ... = ..., + encoding: str | None | ... = ..., + version: str | None | ... = ..., + tags: list[str] | None | ... = ..., + explicit_start: bool | None | ... = ..., + explicit_end: bool | None | ... = ..., + sort_keys: bool | None | ... = ..., ) -> _DumperProtocol: ... diff --git a/lib/yaml/cyaml.py b/lib/yaml/cyaml.py index 9019209e..9753883d 100644 --- a/lib/yaml/cyaml.py +++ b/lib/yaml/cyaml.py @@ -6,7 +6,7 @@ from yaml._yaml import CParser, CEmitter -from .config import LoaderConfigMixin, DumperConfigMixin +from .config import CommonLoaderConfig, CommonDumperConfig from .constructor import * from .serializer import * @@ -14,42 +14,42 @@ from .resolver import * -class CBaseLoader(CParser, BaseConstructor, BaseResolver, LoaderConfigMixin): +class CBaseLoader(CParser, BaseConstructor, BaseResolver, CommonLoaderConfig): def __init__(self, stream): CParser.__init__(self, stream) BaseConstructor.__init__(self) BaseResolver.__init__(self) -class CSafeLoader(CParser, SafeConstructor, Resolver): +class CSafeLoader(CParser, SafeConstructor, Resolver, CommonLoaderConfig): def __init__(self, stream): CParser.__init__(self, stream) SafeConstructor.__init__(self) Resolver.__init__(self) -class CFullLoader(CParser, FullConstructor, Resolver): +class CFullLoader(CParser, FullConstructor, Resolver, CommonLoaderConfig): def __init__(self, stream): CParser.__init__(self, stream) FullConstructor.__init__(self) Resolver.__init__(self) -class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): +class CUnsafeLoader(CParser, UnsafeConstructor, Resolver, CommonLoaderConfig): def __init__(self, stream): CParser.__init__(self, stream) UnsafeConstructor.__init__(self) Resolver.__init__(self) -class CLoader(CParser, Constructor, Resolver, LoaderConfigMixin): +class CLoader(CParser, Constructor, Resolver, CommonLoaderConfig): def __init__(self, stream): CParser.__init__(self, stream) Constructor.__init__(self) Resolver.__init__(self) -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): +class CBaseDumper(CEmitter, BaseRepresenter, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -66,7 +66,7 @@ def __init__(self, stream, default_flow_style=default_flow_style, sort_keys=sort_keys) Resolver.__init__(self) -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): +class CSafeDumper(CEmitter, SafeRepresenter, Resolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -83,7 +83,7 @@ def __init__(self, stream, default_flow_style=default_flow_style, sort_keys=sort_keys) Resolver.__init__(self) -class CDumper(CEmitter, Serializer, Representer, Resolver, DumperConfigMixin): +class CDumper(CEmitter, Serializer, Representer, Resolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, diff --git a/lib/yaml/dumper.py b/lib/yaml/dumper.py index 1c5c236e..a2ffdc8f 100644 --- a/lib/yaml/dumper.py +++ b/lib/yaml/dumper.py @@ -14,8 +14,10 @@ except ImportError as ie: FastestBaseDumper = None +from .config import CommonDumperConfig -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver, DumperConfigMixin): + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -43,7 +45,7 @@ def __init__(self, stream, -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver, DumperConfigMixin): +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -61,7 +63,8 @@ def __init__(self, stream, default_flow_style=default_flow_style, sort_keys=sort_keys) Resolver.__init__(self) -class CommonDumper(Emitter, Serializer, CommonRepresenter, BaseResolver, DumperConfigMixin): + +class CommonDumper(Emitter, Serializer, CommonRepresenter, BaseResolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -85,7 +88,7 @@ def init_tags(cls, tagset: tagset.TagSet): cls.init_resolvers(tagset.resolvers) -class Dumper(Emitter, Serializer, Representer, Resolver, DumperConfigMixin): +class Dumper(Emitter, Serializer, Representer, Resolver, CommonDumperConfig): def __init__(self, stream, default_style=None, default_flow_style=False, @@ -104,7 +107,7 @@ def __init__(self, stream, Resolver.__init__(self) -_12_CoreDumper = CommonDumper.config(type_name='_12_CoreDumper', tagset=tagset.core) -_12_JSONDumper = CommonDumper.config(type_name='_12_JSONDumper', tagset=tagset.json) +_12_CoreDumper = CommonDumper.config(tagset=tagset.core) +_12_JSONDumper = CommonDumper.config(tagset=tagset.json) diff --git a/lib/yaml/loader.py b/lib/yaml/loader.py index 61ab4c94..71979f3e 100644 --- a/lib/yaml/loader.py +++ b/lib/yaml/loader.py @@ -17,8 +17,9 @@ except ImportError as ie: FastestBaseLoader = None +from .config import CommonLoaderConfig -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver, LoaderConfigMixin): +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver, CommonLoaderConfig): def __init__(self, stream): Reader.__init__(self, stream) Scanner.__init__(self) @@ -44,7 +45,7 @@ def init_tags(cls, tagset: tagset.TagSet): # UnsafeLoader = FastestBaseLoader.config(type_name='UnsafeLoader', tagset=tagset.yaml11 | tagset.python_unsafe) # this pattern will also allow a much easier path for users to bolt on default behavior to any old loader -class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver, LoaderConfigMixin): +class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver, CommonLoaderConfig): def __init__(self, stream): Reader.__init__(self, stream) @@ -54,7 +55,7 @@ def __init__(self, stream): FullConstructor.__init__(self) Resolver.__init__(self) -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver, LoaderConfigMixin): +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver, CommonLoaderConfig): def __init__(self, stream): Reader.__init__(self, stream) @@ -64,7 +65,7 @@ def __init__(self, stream): SafeConstructor.__init__(self) Resolver.__init__(self) -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver, LoaderConfigMixin): +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver, CommonLoaderConfig): def __init__(self, stream): Reader.__init__(self, stream) @@ -78,7 +79,7 @@ def __init__(self, stream): # untrusted input). Use of either Loader or UnsafeLoader should be rare, since # FullLoad should be able to load almost all YAML safely. Loader is left intact # to ensure backwards compatibility. -class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver, LoaderConfigMixin): +class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver, CommonLoaderConfig): def __init__(self, stream): Reader.__init__(self, stream) @@ -89,6 +90,6 @@ def __init__(self, stream): Resolver.__init__(self) -_12_CoreLoader = BaseLoader.config(type_name='_12_CoreLoader', tagset=tagset.core) -_12_JSONLoader = BaseLoader.config(type_name='_12_JSONLoader', tagset=tagset.json) +_12_CoreLoader = BaseLoader.config(tagset=tagset.core) +_12_JSONLoader = BaseLoader.config(tagset=tagset.json) diff --git a/tests/lib/test_schema.py b/tests/lib/test_schema.py index 7bc84b7f..be2bb4d6 100644 --- a/tests/lib/test_schema.py +++ b/tests/lib/test_schema.py @@ -49,16 +49,12 @@ def _fail(input, test): print("Input: >>" + input + "<<") print(test) -class MyCoreLoader(yaml.BaseLoader): pass -class MyJSONLoader(yaml.BaseLoader): pass -class MyCoreDumper(yaml.CommonDumper): pass -class MyJSONDumper(yaml.CommonDumper): pass +from yaml.tagset import core, json -MyCoreLoader.init_tags('core') -MyJSONLoader.init_tags('json') - -MyCoreDumper.init_tags('core') -MyJSONDumper.init_tags('json') +MyCoreLoader = yaml.BaseLoader.config(tagset=core) +MyJSONLoader = yaml.BaseLoader.config(tagset=json) +MyCoreDumper = yaml.CommonDumper.config(tagset=core) +MyJSONDumper = yaml.CommonDumper.config(tagset=json) # The tests/data/yaml11.schema file is copied from # https://github.com/perlpunk/yaml-test-schema/blob/master/data/schema-yaml11.yaml