Skip to content

Commit

Permalink
WIP tagset config
Browse files Browse the repository at this point in the history
* Loader/Dumper config mixins to create dynamic types and configure them at instantiation with generated partials
* New `FastestBaseLoader`/`FastestBaseDumper` base classes to auto-select C-back impl if available
  • Loading branch information
nitzmahone committed Nov 6, 2023
1 parent 34e27da commit 4190bbf
Show file tree
Hide file tree
Showing 10 changed files with 352 additions and 179 deletions.
33 changes: 21 additions & 12 deletions lib/yaml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def emit(events, stream=None, Dumper=Dumper,
if stream is None:
stream = io.StringIO()
getvalue = stream.getvalue
# FIXME: redefine these defaults with sentinels to allow preservation of wrapped config defaults
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
try:
Expand All @@ -191,6 +192,7 @@ def serialize_all(nodes, stream=None, Dumper=Dumper,
else:
stream = io.BytesIO()
getvalue = stream.getvalue
# FIXME: redefine these defaults with sentinels to allow preservation of wrapped config defaults
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
Expand All @@ -213,28 +215,35 @@ def serialize(node, stream=None, Dumper=Dumper, **kwds):
return serialize_all([node], stream, Dumper=Dumper, **kwds)

def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
default_style=..., default_flow_style=...,
canonical=..., indent=..., width=...,
allow_unicode=..., line_break=...,
encoding=..., explicit_start=..., explicit_end=...,
version=..., tags=..., sort_keys=..., **kwargs):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
if encoding is None or encoding is ...:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)

# preserve wrapped config defaults for values where we didn't get a default
# FIXME: share this code with the one in config mixin
dumper_init_kwargs = dict(
default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys, **kwargs)

dumper_init_kwargs = {k: v for k, v in dumper_init_kwargs.items() if v is not ...}
dumper = Dumper(stream, **dumper_init_kwargs)
try:
dumper.open()
for data in documents:
Expand Down
75 changes: 75 additions & 0 deletions lib/yaml/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import annotations

import typing as t

from functools import partialmethod
from .tagset import TagSet

T = t.TypeVar('T')


class LoaderConfigMixin:
@classmethod
# FIXME: fix tagset type to use DataClasses, at least externally?
def config(cls: type[T], type_name: str | None = None, tagset: TagSet | ... = ..., **kwargs) -> type[T]:
if not type_name:
# FIXME: hash the inputs for a dynamic type name and cache it?
type_name = f'abcd_from_{cls.__name__}'

new_type = t.cast(cls, type(type_name, (cls, ), {}))

# FIXME: add support for arbitrary kwargs passthru ala dumper?

if tagset is not ...:
# FIXME: provide a base class hook/method for this reset
new_type.yaml_implicit_resolvers = {}
new_type.init_resolvers(tagset.resolvers)
new_type.yaml_constructors = {}
new_type.init_constructors(tagset.constructors)

return new_type


class DumperConfigMixin:
@classmethod
def config(cls: type[T], type_name: str | None = None,
tagset: TagSet | ... = ...,
# FIXME: make some of the more obscure style things "nicer" (eg enums?) or just pass through existing values?
default_style: str | ... = ..., default_flow_style: bool | ... = ...,
# FIXME: properly type-annotate the rest of these
canonical=..., indent=..., width=...,
allow_unicode=..., line_break=...,
encoding=..., explicit_start=..., explicit_end=...,
version=..., tags=..., sort_keys=...,
**kwargs) -> type[T]:

if not type_name:
# FIXME: hash the inputs for a dynamic type name and cache it?
type_name = f'abcd_from_{cls.__name__}'

# preserve wrapped config defaults for values where we didn't get a default
# FIXME: share this code with the one in __init__.dump_all (and implement on others)
dumper_init_kwargs = dict(
default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys, **kwargs)

dumper_init_kwargs = {k: v for k, v in dumper_init_kwargs.items() if v is not ...}

patched_init = partialmethod(cls.__init__,
**dumper_init_kwargs)

new_type = t.cast(cls, type(type_name, (cls, ), {'__init__': patched_init}))

# FIXME: support all the dynamic dispatch types (multi*, etc)
if tagset is not ...:
# FIXME: provide a base class hook/method for this reset
new_type.yaml_implicit_resolvers = {}
new_type.init_resolvers(tagset.resolvers)
new_type.yaml_representers = {}
new_type.init_representers(tagset.representers)

return new_type
93 changes: 45 additions & 48 deletions lib/yaml/constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .nodes import *

import collections.abc, datetime, base64, binascii, re, sys, types
import typing as t

class ConstructorError(MarkedYAMLError):
pass
Expand Down Expand Up @@ -267,15 +268,11 @@ def add_multi_constructor(cls, tag_prefix, multi_constructor):


@classmethod
def init_constructors(cls, tagset):
if tagset not in _constructors:
return
for key in _constructors[tagset]:
callback = _constructors[tagset][key]
if (key is None):
cls.add_constructor(key, callback)
else:
cls.add_constructor('tag:yaml.org,2002:' + key, callback)
def init_constructors(cls, tagset: dict[str, t.Callable]):
for type_name, constructor in tagset.items():
# FIXME: encode full tag names in TagSets to avoid this logic and make user-definable types easier
tag_name = f'tag:yaml.org,2002:{type_name}' if type_name else None
cls.add_constructor(tag_name, constructor)


# SafeConstructor implements YAML 1.1
Expand Down Expand Up @@ -499,45 +496,45 @@ def construct_yaml_object(self, node, cls):
data.__dict__.update(state)


_constructors = {
'yaml11': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': SafeConstructor.construct_yaml_int,
'float': SafeConstructor.construct_yaml_float,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
'binary': SafeConstructor.construct_yaml_binary,
'timestamp': SafeConstructor.construct_yaml_timestamp,
'omap': SafeConstructor.construct_yaml_omap,
'pairs': SafeConstructor.construct_yaml_pairs,
'set': SafeConstructor.construct_yaml_set,
},
'core': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_core,
'float': BaseConstructor.construct_yaml_float_core,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
},
'json': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_json,
'float': BaseConstructor.construct_yaml_float_json,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
},
}

SafeConstructor.init_constructors('yaml11')
_yaml11_constructors = {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': SafeConstructor.construct_yaml_int,
'float': SafeConstructor.construct_yaml_float,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
'binary': SafeConstructor.construct_yaml_binary,
'timestamp': SafeConstructor.construct_yaml_timestamp,
'omap': SafeConstructor.construct_yaml_omap,
'pairs': SafeConstructor.construct_yaml_pairs,
'set': SafeConstructor.construct_yaml_set,
}

_core_constructors = {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_core,
'float': BaseConstructor.construct_yaml_float_core,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
}

_json_constructors = {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_json,
'float': BaseConstructor.construct_yaml_float_json,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
}

SafeConstructor.init_constructors(_yaml11_constructors)

class FullConstructor(SafeConstructor):
# 'extend' is blacklisted because it is used by
Expand Down
7 changes: 4 additions & 3 deletions lib/yaml/cyaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@

from yaml._yaml import CParser, CEmitter

from .config import LoaderConfigMixin, DumperConfigMixin
from .constructor import *

from .serializer import *
from .representer import *

from .resolver import *

class CBaseLoader(CParser, BaseConstructor, BaseResolver):
class CBaseLoader(CParser, BaseConstructor, BaseResolver, LoaderConfigMixin):

def __init__(self, stream):
CParser.__init__(self, stream)
Expand Down Expand Up @@ -41,7 +42,7 @@ def __init__(self, stream):
UnsafeConstructor.__init__(self)
Resolver.__init__(self)

class CLoader(CParser, Constructor, Resolver):
class CLoader(CParser, Constructor, Resolver, LoaderConfigMixin):

def __init__(self, stream):
CParser.__init__(self, stream)
Expand Down Expand Up @@ -82,7 +83,7 @@ def __init__(self, stream,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

class CDumper(CEmitter, Serializer, Representer, Resolver):
class CDumper(CEmitter, Serializer, Representer, Resolver, DumperConfigMixin):

def __init__(self, stream,
default_style=None, default_flow_style=False,
Expand Down
44 changes: 32 additions & 12 deletions lib/yaml/dumper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@

__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'CommonDumper']
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'CommonDumper', 'FastestBaseDumper']

from . import tagset

from .config import DumperConfigMixin
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *

class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
try:
from .cyaml import CDumper as FastestBaseDumper
except ImportError as ie:
FastestBaseDumper = None


class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver, DumperConfigMixin):

def __init__(self, stream,
default_style=None, default_flow_style=False,
Expand All @@ -24,7 +33,17 @@ def __init__(self, stream,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):

if not FastestBaseDumper:
# fall back to pure-Python if CBaseDumper is unavailable
FastestBaseDumper = BaseDumper

# FIXME: reimplement all these as config calls, eg:
# SafeDumper = FastestBaseDumper.config(type_name='SafeDumper', tagset=tagset.yaml11)



class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver, DumperConfigMixin):

def __init__(self, stream,
default_style=None, default_flow_style=False,
Expand All @@ -42,7 +61,7 @@ def __init__(self, stream,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

class CommonDumper(Emitter, Serializer, CommonRepresenter, BaseResolver):
class CommonDumper(Emitter, Serializer, CommonRepresenter, BaseResolver, DumperConfigMixin):

def __init__(self, stream,
default_style=None, default_flow_style=False,
Expand All @@ -61,11 +80,12 @@ def __init__(self, stream,
BaseResolver.__init__(self)

@classmethod
def init_tags(cls, tagset):
cls.init_representers(tagset)
cls.init_resolvers(tagset)
def init_tags(cls, tagset: tagset.TagSet):
cls.init_representers(tagset.representers)
cls.init_resolvers(tagset.resolvers)


class Dumper(Emitter, Serializer, Representer, Resolver):
class Dumper(Emitter, Serializer, Representer, Resolver, DumperConfigMixin):

def __init__(self, stream,
default_style=None, default_flow_style=False,
Expand All @@ -83,8 +103,8 @@ def __init__(self, stream,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

class _12_CoreDumper(CommonDumper): pass
_12_CoreDumper.init_tags('core')
class _12_JSONDumper(CommonDumper): pass
_12_JSONDumper.init_tags('json')

_12_CoreDumper = CommonDumper.config(type_name='_12_CoreDumper', tagset=tagset.core)
_12_JSONDumper = CommonDumper.config(type_name='_12_JSONDumper', tagset=tagset.json)


Loading

0 comments on commit 4190bbf

Please sign in to comment.