Skip to content

Commit

Permalink
feat(api): add ibis.memtable API for constructing in-memory table e…
Browse files Browse the repository at this point in the history
…xpressions
  • Loading branch information
cpcloud committed Aug 25, 2022
1 parent 9796d4a commit 0cc6948
Show file tree
Hide file tree
Showing 16 changed files with 518 additions and 193 deletions.
12 changes: 9 additions & 3 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.util as util
Expand Down Expand Up @@ -212,9 +213,14 @@ def create_table(
with self.begin() as bind:
t.create(bind=bind, checkfirst=force)
if expr is not None:
bind.execute(
t.insert().from_select(list(expr.columns), expr.compile())
)
compiled = self.compile(expr)
insert = t.insert()
if isinstance(expr.op(), ops.InMemoryTable):
compiled = compiled.get_final_froms()[0]
sa_expr = insert.values(*compiled._data)
else:
sa_expr = insert.from_select(list(expr.columns), compiled)
bind.execute(sa_expr)

def _columns_from_schema(
self, name: str, schema: sch.Schema
Expand Down
6 changes: 5 additions & 1 deletion ibis/backends/base/sql/alchemy/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ def _format_table(self, expr):
)
backend = ref_op.child._find_backend()
backend._create_temp_view(view=result, definition=definition)
elif isinstance(ref_op, ops.InMemoryTable):
columns = _schema_to_sqlalchemy_columns(ref_op.schema)
rows = list(ref_op.data.itertuples(index=False))
result = sa.values(*columns).data(rows)
else:
# A subquery
if ctx.is_extracted(ref_expr):
Expand Down Expand Up @@ -194,7 +198,7 @@ def _compile_subqueries(self):

def _compile_table_set(self):
if self.table_set is not None:
helper = _AlchemyTableSetFormatter(self, self.table_set)
helper = self.table_set_formatter_class(self, self.table_set)
result = helper.get_result()
if isinstance(result, sql.selectable.Select):
return result.subquery()
Expand Down
7 changes: 7 additions & 0 deletions ibis/backends/base/sql/compiler/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ def _format_table(self, expr):
raise com.RelationError(f'Table did not have a name: {expr!r}')
result = self._quote_identifier(name)
is_subquery = False
elif isinstance(ref_op, ops.InMemoryTable):
rows = ", ".join(
f"({', '.join(map(repr, col))})"
for col in ref_op.data.itertuples(index=False)
)
result = f"(VALUES {rows})"
is_subquery = True
else:
# A subquery
if ctx.is_extracted(ref_expr):
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/base/sql/compiler/select_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,11 @@ def _collect_Selection(self, expr, toplevel=False):
self.table_set = table
self.filters = filters

def _collect_PandasInMemoryTable(self, expr, toplevel=False):
if toplevel:
self.select_set = [expr]
self.table_set = expr

def _convert_group_by(self, exprs):
return list(range(len(exprs)))

Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/duckdb/compiler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from sqlalchemy.ext.compiler import compiles

import ibis.backends.base.sql.alchemy.datatypes as sat
Expand Down
25 changes: 25 additions & 0 deletions ibis/backends/pandas/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@

import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.rules as rlz
import ibis.expr.schema as sch
from ibis import util
from ibis.backends.base import Database
from ibis.common.grounds import Immutable

infer_pandas_dtype = pd.api.types.infer_dtype

Expand Down Expand Up @@ -298,6 +301,28 @@ def convert_array_to_series(in_dtype, out_dtype, column):
sch.Schema.to_pandas = ibis_schema_to_pandas # type: ignore


class DataFrameProxy(Immutable):
__slots__ = ('_df', '_hash')

def __init__(self, df):
object.__setattr__(self, "_df", df)
object.__setattr__(self, "_hash", hash((type(df), id(df))))

def __getattr__(self, name):
return getattr(self._df, name)

def __hash__(self):
return self._hash

def __repr__(self):
df_repr = util.indent(repr(self._df), spaces=2)
return f"{self.__class__.__name__}:\n{df_repr}"


class PandasInMemoryTable(ops.InMemoryTable):
data = rlz.instance_of(DataFrameProxy)


class PandasTable(ops.DatabaseTable):
pass

Expand Down
5 changes: 4 additions & 1 deletion ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ def compile(self, expr, timecontext=None, params=None, *args, **kwargs):
timecontext,
)
return PySparkExprTranslator().translate(
expr, scope=scope, timecontext=timecontext
expr,
scope=scope,
timecontext=timecontext,
session=self._session,
)

def execute(
Expand Down
Loading

0 comments on commit 0cc6948

Please sign in to comment.