Skip to content

Commit

Permalink
refactor(ir): remove pydantic dependency and make grounds more compos…
Browse files Browse the repository at this point in the history
…able

BREAKING CHANGE: Annotable is mutable by default now
  • Loading branch information
kszucs committed Sep 19, 2022
1 parent ed532bf commit 9da0f41
Show file tree
Hide file tree
Showing 23 changed files with 617 additions and 655 deletions.
15 changes: 14 additions & 1 deletion docs/api/config.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Configuration Options

::: ibis.config
<!-- prettier-ignore-start -->
::: ibis.config.Options
rendering:
show_bases: false
::: ibis.config.Repr
rendering:
show_bases: false
::: ibis.config.SQL
rendering:
show_bases: false
::: ibis.config.ContextAdjustment
rendering:
show_bases: false
<!-- prettier-ignore-end -->
17 changes: 11 additions & 6 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import pandas as pd
import toolz
from clickhouse_driver.client import Client as _DriverClient
from pydantic import Field

import ibis
import ibis.common.validators as rlz
import ibis.config
import ibis.expr.schema as sch
from ibis.backends.base.sql import BaseSQLBackend
Expand All @@ -33,11 +33,16 @@ class Backend(BaseSQLBackend):
table_expr_class = ClickhouseTable
compiler = ClickhouseCompiler

class Options(ibis.config.BaseModel):
temp_db: str = Field(
default="__ibis_tmp",
description="Database to use for temporary objects.",
)
class Options(ibis.config.Config):
"""Clickhouse options.
Attributes
----------
temp_db : str
Database to use for temporary objects.
"""

temp_db = rlz.optional(rlz.str_, default="__ibis_tmp")

def __init__(self, *args, external_tables=None, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
24 changes: 14 additions & 10 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import fsspec
import numpy as np
import pandas as pd
from pydantic import Field

import ibis.common.exceptions as com
import ibis.config
Expand Down Expand Up @@ -176,15 +175,20 @@ class Backend(BaseSQLBackend):
table_expr_class = ImpalaTable
compiler = ImpalaCompiler

class Options(ibis.config.BaseModel):
temp_db: str = Field(
default="__ibis_tmp",
description="Database to use for temporary objects.",
)
temp_hdfs_path: str = Field(
default="/tmp/hdfs",
description="HDFS path for storage of temporary data",
)
class Options(ibis.config.Config):
"""
Impala specific options.
Parameters
----------
temp_db : str, default "__ibis_tmp"
Database to use for temporary objects.
temp_hdfs_path : str, default "/tmp/ibis"
HDFS path for storage of temporary data.
"""

temp_db = rlz.optional(rlz.str_, default="__ibis_tmp")
temp_hdfs_path = rlz.optional(rlz.str_, default="/tmp/hdfs")

@staticmethod
def hdfs_connect(
Expand Down
9 changes: 3 additions & 6 deletions ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Any, MutableMapping

import pandas as pd
from pydantic import Field

import ibis.common.exceptions as com
import ibis.config
Expand All @@ -18,6 +17,7 @@
PandasTable,
ibis_schema_to_pandas,
)
from ibis.common.validators import instance_of, optional


class BasePandasBackend(BaseBackend):
Expand All @@ -28,11 +28,8 @@ class BasePandasBackend(BaseBackend):
name = "pandas"
backend_table_type = pd.DataFrame

class Options(ibis.config.BaseModel):
enable_trace: bool = Field(
default=False,
description="Enable tracing for execution.",
)
class Options(ibis.config.Config):
enable_trace = optional(instance_of(bool), default=False)

def do_connect(
self,
Expand Down
17 changes: 11 additions & 6 deletions ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import pandas as pd
import pyspark
import sqlalchemy as sa
from pydantic import Field
from pyspark import SparkConf
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.column import Column

import ibis.common.exceptions as com
import ibis.common.validators as rlz
import ibis.config
import ibis.expr.operations as ops
import ibis.expr.schema as sch
Expand Down Expand Up @@ -106,11 +106,16 @@ class Backend(BaseSQLBackend):
table_class = PySparkDatabaseTable
table_expr_class = PySparkTable

class Options(ibis.config.BaseModel):
treat_nan_as_null: bool = Field(
default=False,
description="Treat NaNs in floating point expressions as NULL.",
)
class Options(ibis.config.Config):
"""PySpark options.
Attributes
----------
treat_nan_as_null : bool
Treat NaNs in floating point expressions as NULL.
"""

treat_nan_as_null = rlz.optional(rlz.bool_, default=False)

def _from_url(self, url: str) -> Backend:
"""Construct a PySpark backend from a URL `url`."""
Expand Down
127 changes: 65 additions & 62 deletions ibis/common/grounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import inspect
from abc import ABCMeta, abstractmethod
from typing import Any, Hashable
from typing import Any
from weakref import WeakValueDictionary

from rich.console import Console
Expand Down Expand Up @@ -31,24 +31,13 @@ def __call__(cls, *args, **kwargs):

class Base(metaclass=BaseMeta):

__slots__ = ()
__slots__ = ('__weakref__',)

@classmethod
def __create__(cls, *args, **kwargs):
return type.__call__(cls, *args, **kwargs)


class Immutable(Hashable):

__slots__ = ()

def __setattr__(self, name: str, _: Any) -> None:
raise TypeError(
f"Attribute {name!r} cannot be assigned to immutable instance of "
f"type {type(self)}"
)


class AnnotableMeta(BaseMeta):
"""
Metaclass to turn class annotations into a validatable function signature.
Expand Down Expand Up @@ -117,80 +106,70 @@ def __new__(metacls, clsname, bases, dct):
return super().__new__(metacls, clsname, bases, attribs)


class Annotable(Base, Immutable, metaclass=AnnotableMeta):
class Annotable(Base, metaclass=AnnotableMeta):
"""Base class for objects with custom validation rules."""

__slots__ = ("__args__", "__precomputed_hash__")

@classmethod
def __create__(cls, *args, **kwargs):
# construct the instance by passing the validated keyword arguments
kwargs = cls.__signature__.validate(*args, **kwargs)
return super().__create__(**kwargs)

def __init__(self, **kwargs):
# set the already validated fields using object.__setattr__ since we
# treat the annotable instances as immutable objects
# set the already validated fields using object.__setattr__
for name, value in kwargs.items():
object.__setattr__(self, name, value)
# allow child classes to do some post-initialization
self.__post_init__()

# optimizations to store frequently accessed generic properties
args = tuple(kwargs[name] for name in self.__argnames__)
object.__setattr__(self, "__args__", args)
object.__setattr__(
self, "__precomputed_hash__", hash((self.__class__, args))
)

def __post_init__(self):
# calculate special property-like objects only once due to the
# immutable nature of annotable instances
for name, prop in self.__properties__.items():
object.__setattr__(self, name, prop(self))

# any supplemental custom code provided by descendant classes
self.__post_init__()

def __post_init__(self):
pass
def __setattr__(self, name, value):
param = self.__signature__.parameters[name]
if param.default is not None or value is not None:
value = param.validate(value, this=self.__getstate__())
super().__setattr__(name, value)

def __hash__(self):
return self.__precomputed_hash__
def __repr__(self) -> str:
args = (f"{n}={getattr(self, n)!r}" for n in self.__argnames__)
argstring = ", ".join(args)
return f"{self.__class__.__name__}({argstring})"

def __eq__(self, other):
return super().__eq__(other)
if type(self) is not type(other):
return NotImplemented

def __repr__(self) -> str:
args = ", ".join(
f"{name}={value!r}"
for name, value in zip(self.__argnames__, self.__args__)
return all(
getattr(self, n) == getattr(other, n) for n in self.__argnames__
)
return f"{self.__class__.__name__}({args})"

@classmethod
def _reconstruct(cls, kwargs):
# bypass Annotable.__construct__() when deserializing
self = cls.__new__(cls)
self.__init__(**kwargs)
return self
def __getstate__(self):
return {name: getattr(self, name) for name in self.__argnames__}

def __reduce__(self):
kwargs = dict(zip(self.__argnames__, self.__args__))
return (self._reconstruct, (kwargs,))
def __setstate__(self, state):
self.__init__(**state)

# TODO(kszucs): consider to make a separate mixin class for this
def copy(self, **overrides):
kwargs = dict(zip(self.__argnames__, self.__args__))
newargs = {**kwargs, **overrides}
return self.__class__(**newargs)
kwargs = self.__getstate__()
kwargs.update(overrides)
return self.__class__(**kwargs)


class Weakrefable(Base):
class Immutable(Base):
__slots__ = ()

__slots__ = ('__weakref__',)
def __setattr__(self, name: str, _: Any) -> None:
raise TypeError(
f"Attribute {name!r} cannot be assigned to immutable instance of "
f"type {type(self)}"
)


class Singleton(Weakrefable):
# NOTE: this only considers the input arguments, when combined with
# Annotable base class Singleton must come after in the MRO
class Singleton(Base):

__slots__ = ()
__instances__ = WeakValueDictionary()
Expand All @@ -206,19 +185,16 @@ def __create__(cls, *args, **kwargs):
return instance


class Comparable(Weakrefable):
class Comparable(Base):

__slots__ = ()
__cache__ = WeakCache()

def __hash__(self):
return super().__hash__()

def __eq__(self, other):
try:
return self.__cached_equals__(other)
except TypeError:
raise NotImplemented # noqa: F901
return NotImplemented # noqa: F901

@abstractmethod
def __equals__(self, other):
Expand All @@ -233,7 +209,7 @@ def __cached_equals__(self, other):
return False

# reduce space required for commutative operation
if hash(self) < hash(other):
if id(self) < id(other):
key = (self, other)
else:
key = (other, self)
Expand All @@ -245,3 +221,30 @@ def __cached_equals__(self, other):
self.__cache__[key] = result

return result


class Concrete(Immutable, Comparable, Annotable):

__slots__ = ("__args__", "__precomputed_hash__")

def __post_init__(self):
# optimizations to store frequently accessed generic properties
arguments = tuple(getattr(self, name) for name in self.__argnames__)
hashvalue = hash((self.__class__, arguments))
object.__setattr__(self, "__args__", arguments)
object.__setattr__(self, "__precomputed_hash__", hashvalue)
super().__post_init__()

def __hash__(self):
return self.__precomputed_hash__

def __equals__(self, other):
return self.__args__ == other.__args__

@property
def args(self):
return self.__args__

@property
def argnames(self):
return self.__argnames__
Loading

0 comments on commit 9da0f41

Please sign in to comment.