Skip to content

Commit

Permalink
feat(sqlite): implement _get_schema_using_query in SQLite backend
Browse files Browse the repository at this point in the history
As SQLite does not provide this functionality directly the code here
creates a temporal view for the query and then uses `table_info`
pragma for retrieving the schema.

A convoluted and changing named is used for the view in order to avoid
collisions. The lifetime of SQLAlchemy SQLite backend connections is
not documented so this seems to be the safest thing to do.

The code has been highly inspired by DuckDB backend code.
  • Loading branch information
salva authored and cpcloud committed Jan 17, 2023
1 parent 12f6438 commit 7ff84c8
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 25 deletions.
19 changes: 14 additions & 5 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import abc
import atexit
import contextlib
import getpass
import warnings
Expand Down Expand Up @@ -559,7 +560,14 @@ def _get_temp_view_definition(
)

def _register_temp_view_cleanup(self, name: str, raw_name: str) -> None:
pass
query = f"DROP VIEW IF EXISTS {name}"

def drop(self, raw_name: str, query: str):
with self.con.begin() as con:
con.execute(query)
self._temp_views.discard(raw_name)

atexit.register(drop, self, raw_name, query)

def _get_compiled_statement(
self,
Expand All @@ -570,17 +578,18 @@ def _get_compiled_statement(
if compile_kwargs is None:
compile_kwargs = {}
compiled = definition.compile(compile_kwargs=compile_kwargs)
defn = self._get_temp_view_definition(name, definition=compiled)
return defn, compiled.params
lines = self._get_temp_view_definition(name, definition=compiled)
return lines, compiled.params

def _create_temp_view(self, view: sa.Table, definition: sa.sql.Selectable) -> None:
raw_name = view.name
if raw_name not in self._temp_views and raw_name in self.list_tables():
raise ValueError(f"{raw_name} already exists as a table or view")
name = self.con.dialect.identifier_preparer.quote_identifier(view.name)
compiled, params = self._get_compiled_statement(definition, name)
lines, params = self._get_compiled_statement(definition, name)
with self.begin() as con:
con.execute(compiled, **params)
for line in lines:
con.execute(line, **params)
self._temp_views.add(raw_name)
self._register_temp_view_cleanup(name, raw_name)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def _get_temp_view_definition(
name: str,
definition: sa.sql.compiler.Compiled,
) -> str:
return f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}"
yield f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}"

def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable):
# TODO: remove this once duckdb supports CTAS prepared statements
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,4 @@ def _get_temp_view_definition(
name: str,
definition: sa.sql.compiler.Compiled,
) -> str:
return f"CREATE OR ALTER VIEW {name} AS {definition}"
yield f"CREATE OR ALTER VIEW {name} AS {definition}"
2 changes: 1 addition & 1 deletion ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _get_temp_view_definition(
name: str,
definition: sa.sql.compiler.Compiled,
) -> str:
return f"CREATE OR REPLACE VIEW {name} AS {definition}"
yield f"CREATE OR REPLACE VIEW {name} AS {definition}"


# TODO(kszucs): unsigned integers
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,4 @@ def _get_temp_view_definition(
name: str,
definition: sa.sql.compiler.Compiled,
) -> str:
return f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}"
yield f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}"
56 changes: 50 additions & 6 deletions ibis/backends/sqlite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@
import sqlite3
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Iterable
from typing import TYPE_CHECKING, Iterator

import sqlalchemy as sa
import toolz
from sqlalchemy.dialects.sqlite import DATETIME, TIMESTAMP

import ibis.expr.schema as sch
from ibis import util
from ibis.backends.base import Database
from ibis.backends.base.sql.alchemy import BaseAlchemyBackend, to_sqla_type
from ibis.backends.sqlite import udf
from ibis.backends.sqlite.compiler import SQLiteCompiler
from ibis.backends.sqlite.datatypes import parse
from ibis.expr.schema import datatype

if TYPE_CHECKING:
Expand Down Expand Up @@ -216,9 +220,49 @@ def _table_from_schema(self, name, schema, database: str | None = None) -> sa.Ta
def _current_schema(self) -> str | None:
return self.current_database

def _metadata(self, _: str) -> Iterable[tuple[str, dt.DataType]]:
raise ValueError(
"The SQLite backend cannot infer schemas from raw SQL - "
"please specify the schema directly when calling `.sql` "
"using the `schema` keyword argument"
def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]:
view = f"__ibis_sqlite_metadata{util.guid()}"

with self.begin() as con:
# create a view that should only be visible in this transaction
con.execute(f"CREATE TEMPORARY VIEW {view} AS {query}")

# extract table info from the view
table_info = con.execute(f"PRAGMA table_info({view})")

# get names and not nullables
names, notnulls, raw_types = zip(
*toolz.pluck(["name", "notnull", "type"], table_info)
)

# get the type of the first row if no affinity was returned in
# `raw_types`; assume that reflects the rest of the rows
type_queries = ", ".join(map("typeof({})".format, names))
single_row_types = con.execute(
f"SELECT {type_queries} FROM {view} LIMIT 1"
).fetchone()
for name, notnull, raw_typ, typ in zip(
names, notnulls, raw_types, single_row_types
):
ibis_type = parse(raw_typ or typ)
yield name, ibis_type(nullable=not notnull)

# drop the view when we're done with it
con.execute(f"DROP VIEW IF EXISTS {view}")

def _get_schema_using_query(self, query: str) -> sch.Schema:
"""Return an ibis Schema from a SQLite SQL string."""
return sch.Schema.from_tuples(self._metadata(query))

def _get_temp_view_definition(
self,
name: str,
definition: sa.sql.compiler.Compiled,
) -> str:
yield f"DROP VIEW IF EXISTS {name}"
yield f"CREATE VIEW {name} AS {definition}"

def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable):
return super()._get_compiled_statement(
view, definition, compile_kwargs={"literal_binds": True}
)
38 changes: 38 additions & 0 deletions ibis/backends/sqlite/datatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Parse SQLite data types."""

from __future__ import annotations

import ibis.expr.datatypes as dt


def parse(text: str) -> dt.DataType:
"""Parse `text` into an ibis data type."""
text = text.strip().upper()

# SQLite affinity rules
# (see https://www.sqlite.org/datatype3.html).

# 1. If the declared type contains the string "INT" then it is
# assigned INTEGER affinity.
if "INT" in text:
return dt.int64

# 2. If the declared type of the column contains any of the
# strings "CHAR", "CLOB", or "TEXT" then that column has TEXT
# affinity. Notice that the type VARCHAR contains the string
# "CHAR" and is thus assigned TEXT affinity.
if "CHAR" in text or "CLOB" in text or "TEXT" in text:
return dt.string

# 3. If the declared type for a column contains the string "BLOB"
# or if no type is specified then the column has affinity BLOB.
if not text or "BLOB" in text:
return dt.binary

# 4. If the declared type for a column contains any of the strings
# "REAL", "FLOA", or "DOUB" then the column has REAL affinity.
if "REAL" in text or "FLOA" in text or "DOUB" in text:
return dt.float64

# 5. Otherwise, the affinity is NUMERIC.
return dt.decimal
15 changes: 11 additions & 4 deletions ibis/backends/sqlite/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,22 @@ def test_list_tables(con):
assert len(con.list_tables(like='functional')) == 1


def test_attach_file(dbpath):
client = ibis.sqlite.connect(None)
def test_attach_file(tmp_path):
dbpath = str(tmp_path / "attached.db")
path_client = ibis.sqlite.connect(dbpath)
path_client.create_table("test", schema=ibis.schema(dict(a="int")))

client.attach('foo', Path(dbpath))
client = ibis.sqlite.connect()

assert not client.list_tables()

client.attach('baz', Path(dbpath))
client.attach('bar', dbpath)

foo_tables = client.list_tables(database='foo')
foo_tables = client.list_tables(database='baz')
bar_tables = client.list_tables(database='bar')

assert foo_tables == ["test"]
assert foo_tables == bar_tables


Expand Down
7 changes: 1 addition & 6 deletions ibis/backends/tests/test_dot_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import ibis
from ibis import _, util

REQUIRES_EXPLICIT_SCHEMA = {"sqlite"}
table_dot_sql_notimpl = pytest.mark.notimpl(
["bigquery", "sqlite", "clickhouse", "impala"]
)
table_dot_sql_notimpl = pytest.mark.notimpl(["bigquery", "clickhouse", "impala"])
dot_sql_notimpl = pytest.mark.notimpl(["datafusion"])
dot_sql_notyet = pytest.mark.notyet(
["snowflake"],
Expand All @@ -33,8 +30,6 @@
],
)
def test_con_dot_sql(backend, con, schema):
if schema is None and con.name in REQUIRES_EXPLICIT_SCHEMA:
pytest.xfail(f"{con.name} requires an explicit schema for .sql")
alltypes = con.table("functional_alltypes")
# pull out the quoted name
name = alltypes.op().name
Expand Down

0 comments on commit 7ff84c8

Please sign in to comment.