Skip to content

Commit

Permalink
refactor(get_schema): remove schema kwarg, add catalog, kw-only
Browse files Browse the repository at this point in the history
This is not a breaking change as `get_schema` was introduced during
the-epic-split refactor and has not yet been released.
  • Loading branch information
gforsyth committed Mar 25, 2024
1 parent c43c0f1 commit 6273e7e
Show file tree
Hide file tree
Showing 20 changed files with 134 additions and 74 deletions.
19 changes: 18 additions & 1 deletion ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,9 @@ def list_tables(
"""

@abc.abstractmethod
def table(self, name: str, database: str | None = None) -> ir.Table:
def table(
self, name: str, database: tuple[str, str] | str | None = None
) -> ir.Table:
"""Construct a table expression.
Parameters
Expand All @@ -942,6 +944,21 @@ def table(self, name: str, database: str | None = None) -> ir.Table:
Table name
database
Database name
If not provided, the current database is used.
For backends that support multi-level table hierarchies, you can
pass in a dotted string path like `"catalog.database"` or a tuple of
strings like `("catalog", "database")`.
::: {.callout-note}
## Ibis does not use the word `schema` to refer to database hierarchy.
A collection of tables is referred to as a `database`.
A collection of `database` is referred to as a `catalog`.
These terms are mapped onto the corresponding features in each
backend (where available), regardless of whether the backend itself
uses the same terminology.
:::
Returns
-------
Expand Down
12 changes: 9 additions & 3 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,11 +839,17 @@ def _gen_udf_name(self, name: str, schema: Optional[str]) -> str:
return ".".join(f"`{part}`" for part in func.split("."))
return func

def get_schema(self, name, schema: str | None = None, database: str | None = None):
def get_schema(
self,
name,
*,
catalog: str | None = None,
database: str | None = None,
):
table_ref = bq.TableReference(
bq.DatasetReference(
project=database or self.data_project,
dataset_id=schema or self.current_database,
project=catalog or self.data_project,
dataset_id=database or self.current_database,
),
name,
)
Expand Down
14 changes: 9 additions & 5 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,11 @@ def disconnect(self) -> None:
self.con.close()

def get_schema(
self, table_name: str, database: str | None = None, schema: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
"""Return a Schema object for the indicated table and database.
Expand All @@ -471,20 +475,20 @@ def get_schema(
table_name
May **not** be fully qualified. Use `database` if you want to
qualify the identifier.
catalog
Catalog name, not supported by ClickHouse
database
Database name
schema
Schema name, not supported by ClickHouse
Returns
-------
sch.Schema
Ibis schema
"""
if schema is not None:
if catalog is not None:
raise com.UnsupportedBackendFeatureError(
"`schema` namespaces are not supported by clickhouse"
"`catalog` namespaces are not supported by clickhouse"
)
query = sge.Describe(this=sg.table(table_name, db=database))
with self._safe_raw_sql(query) as results:
Expand Down
15 changes: 9 additions & 6 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,17 +274,20 @@ def list_tables(
"""
return self._filter_with_like(self.con.tables(), like)

# XXX(Gil): refactor schema out of this
def get_schema(
self, table_name: str, schema: str | None = None, database: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
if database is not None:
catalog = self.con.catalog(database)
if catalog is not None:
catalog = self.con.catalog(catalog)
else:
catalog = self.con.catalog()

if schema is not None:
database = catalog.database(schema)
if database is not None:
database = catalog.database(database)
else:
database = catalog.database()

Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/druid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,15 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
return sch.Schema(schema)

def get_schema(
self, table_name: str, schema: str | None = None, database: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
return self._get_schema_using_query(
sg.select(STAR)
.from_(sg.table(table_name, db=schema, catalog=database))
.from_(sg.table(table_name, db=database, catalog=catalog))
.sql(self.dialect)
)

Expand Down
20 changes: 12 additions & 8 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def table(
Table expression
"""
table_schema = self.get_schema(name, schema=schema, database=database)
table_schema = self.get_schema(name, catalog=schema, database=database)
# load geospatial only if geo columns
if any(typ.is_geospatial() for typ in table_schema.types):
self.load_extension("spatial")
Expand All @@ -286,7 +286,11 @@ def table(
).to_expr()

def get_schema(
self, table_name: str, schema: str | None = None, database: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
"""Compute the schema of a `table`.
Expand All @@ -295,8 +299,8 @@ def get_schema(
table_name
May **not** be fully qualified. Use `database` if you want to
qualify the identifier.
schema
Schema name
catalog
Catalog name
database
Database name
Expand All @@ -308,11 +312,11 @@ def get_schema(
"""
conditions = [sg.column("table_name").eq(sge.convert(table_name))]

if database is not None:
conditions.append(sg.column("table_catalog").eq(sge.convert(database)))
if catalog is not None:
conditions.append(sg.column("table_catalog").eq(sge.convert(catalog)))

if schema is not None:
conditions.append(sg.column("table_schema").eq(sge.convert(schema)))
if database is not None:
conditions.append(sg.column("table_schema").eq(sge.convert(database)))

query = (
sg.select(
Expand Down
11 changes: 9 additions & 2 deletions ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,20 @@ def list_tables(self, like=None, database=None):
return self._filter_with_like([table for (table,) in tables], like=like)

def get_schema(
self, table_name: str, schema: str | None = None, database: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
return self._get_schema_using_query(
sg.select(STAR)
.from_(
sg.table(
table_name, db=schema, catalog=database, quoted=self.compiler.quoted
table_name,
db=database,
catalog=catalog,
quoted=self.compiler.quoted,
)
)
.sql(self.dialect)
Expand Down
7 changes: 4 additions & 3 deletions ibis/backends/flink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,19 +261,20 @@ def table(
def get_schema(
self,
table_name: str,
database: str | None = None,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
"""Return a Schema object for the indicated table and database.
Parameters
----------
table_name : str
Table name.
database : str, optional
Database name.
catalog : str, optional
Catalog name.
database : str, optional
Database name.
Returns
-------
Expand Down
12 changes: 8 additions & 4 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,20 @@ def drop_database(self, name, force=False):
self._safe_exec_sql(statement)

def get_schema(
self, table_name: str, schema: str | None = None, database: str | None = None
self,
table_name: str,
*,
catalog: str | None = None,
database: str | None = None,
) -> sch.Schema:
"""Return a Schema object for the indicated table and database.
Parameters
----------
table_name
Table name
schema
Schema name. Unused in the impala backend.
catalog
Catalog name. Unused in the impala backend.
database
Database name
Expand All @@ -374,7 +378,7 @@ def get_schema(
"""
query = sge.Describe(
this=sg.table(
table_name, db=schema, catalog=database, quoted=self.compiler.quoted
table_name, db=database, catalog=catalog, quoted=self.compiler.quoted
)
)

Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ def do_connect(
self.con = con

def get_schema(
self, name: str, schema: str | None = None, database: str | None = None
self, name: str, *, catalog: str | None = None, database: str | None = None
) -> sch.Schema:
conditions = [sg.column("table_name").eq(sge.convert(name))]

if schema is not None:
conditions.append(sg.column("table_schema").eq(sge.convert(schema)))
if database is not None:
conditions.append(sg.column("table_schema").eq(sge.convert(database)))

query = (
sg.select(
Expand All @@ -152,7 +152,7 @@ def get_schema(
sg.table(
"columns",
db="information_schema",
catalog=database or self.current_catalog,
catalog=catalog or self.current_catalog,
)
)
.where(*conditions)
Expand All @@ -163,7 +163,7 @@ def get_schema(
meta = cur.fetchall()

if not meta:
fqn = sg.table(name, db=schema, catalog=database).sql(self.dialect)
fqn = sg.table(name, db=database, catalog=catalog).sql(self.dialect)
raise com.IbisError(f"Table not found: {fqn}")

mapping = {}
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,9 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
cur.execute(f"DROP TABLE {table}")

def get_schema(
self, name: str, schema: str | None = None, database: str | None = None
self, name: str, *, catalog: str | None = None, database: str | None = None
) -> sch.Schema:
table = sg.table(name, db=schema, catalog=database, quoted=True).sql(self.name)
table = sg.table(name, db=database, catalog=catalog, quoted=True).sql(self.name)

with self.begin() as cur:
cur.execute(f"DESCRIBE {table}")
Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/oracle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,10 @@ def list_databases(
return self._filter_with_like(schemata, like)

def get_schema(
self, name: str, schema: str | None = None, database: str | None = None
self, name: str, *, catalog: str | None = None, database: str | None = None
) -> sch.Schema:
if schema is None:
schema = self.con.username.upper()
if database is None:
database = self.con.username.upper()
stmt = (
sg.select(
C.column_name,
Expand All @@ -308,7 +308,7 @@ def get_schema(
.from_(sg.table("all_tab_columns"))
.where(
C.table_name.eq(sge.convert(name)),
C.owner.eq(sge.convert(schema)),
C.owner.eq(sge.convert(database)),
)
.order_by(C.column_id)
)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def table(self, name: str, schema: sch.Schema | None = None):
schema = PandasData.infer_table(df, schema=schema)
return ops.DatabaseTable(name, schema, self).to_expr()

def get_schema(self, table_name, database=None):
def get_schema(self, table_name, *, database=None):
schemas = self.schemas
try:
schema = schemas[table_name]
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def create_table(
self._add_table(name, obj)
return self.table(name)

def get_schema(self, table_name, database=None):
def get_schema(self, table_name):
return self._tables[table_name].schema

@classmethod
Expand Down
6 changes: 5 additions & 1 deletion ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,11 @@ def _register_udfs(self, expr: ir.Expr) -> None:
pass

def get_schema(
self, name: str, schema: str | None = None, database: str | None = None
self,
name: str,
*,
catalog: str | None = None,
database: str | None = None,
):
a = ColGen(table="a")
c = ColGen(table="c")
Expand Down
Loading

0 comments on commit 6273e7e

Please sign in to comment.