Skip to content

Commit

Permalink
refactor(list_tables): deprecate schema keyword
Browse files Browse the repository at this point in the history
This is part of a larger refactor where we are removing all usage of the
word `schema` in its hierarchical sense.

Ibis will adhere to the following convention moving forward:
* `schema`: a mapping of column names to datatypes
* `database`: a collection of tables
* `catalog`: a collection of databases

These terms are mapped accordingly onto the specific backend
terminology.

This change to `list_tables` is not a breaking change. `schema` is
deprecated and will warn on usage, but all existing code should remain
functional.

chore(schema): add mixin to help with schema deprecation warnings

refactor(trino): deprecate schema in list_tables

refactor(postgres): deprecate schema in list_tables

refactor(oracle): deprecate schema in list_tables

refactor(bigquery): deprecate schema in list_tables

refactor(mysql): deprecate schema in list_tables

refactor(duckdb): deprecate schema in list_tables

refactor(mssql): deprecate schema in list_tables

refactor(snowflake): deprecate schema in list_tables

docs(list_tables): add backend-specific docstrings for list_tables

fix(postgres): handle temp tables in list_tables
  • Loading branch information
gforsyth committed Mar 25, 2024
1 parent 0aceefc commit ed69960
Show file tree
Hide file tree
Showing 29 changed files with 662 additions and 120 deletions.
4 changes: 4 additions & 0 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,7 @@ INSERT INTO map VALUES

CREATE OR REPLACE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);

CREATE SCHEMA shops;
CREATE TABLE shops.ice_cream (flavor TEXT, quantity INT);
INSERT INTO shops.ice_cream values ('vanilla', 2), ('chocolate', 3);
20 changes: 17 additions & 3 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def _filter_with_like(values: Iterable[str], like: str | None = None) -> list[st

@abc.abstractmethod
def list_tables(
self, like: str | None = None, database: str | None = None
self, like: str | None = None, database: tuple[str, str] | str | None = None
) -> list[str]:
"""Return the list of table names in the current database.
Expand All @@ -824,8 +824,22 @@ def list_tables(
like
A pattern in Python's regex format.
database
The database from which to list tables. If not provided, the
current database is used.
The database from which to list tables.
If not provided, the current database is used.
For backends that support multi-level table hierarchies, you can
pass in a dotted string path like `"catalog.database"` or a tuple of
strings like `("catalog", "database")`.
::: {.callout-note}
## Ibis does not use the word `schema` to refer to database hierarchy.
A collection of tables is referred to as a `database`.
A collection of `database` is referred to as a `catalog`.
These terms are mapped onto the corresponding features in each
backend (where available), regardless of whether the backend itself
uses the same terminology.
:::
Returns
-------
Expand Down
49 changes: 24 additions & 25 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
project = self._session_dataset.project
dataset = self._session_dataset.dataset_id

if raw_name not in self.list_tables(schema=dataset, database=project):
if raw_name not in self.list_tables(database=(project, dataset)):
table_id = sg.table(
raw_name, db=dataset, catalog=project, quoted=False
).sql(dialect=self.name)
Expand Down Expand Up @@ -863,7 +863,7 @@ def list_schemas(
def list_tables(
self,
like: str | None = None,
database: str | None = None,
database: tuple[str, str] | str | None = None,
schema: str | None = None,
) -> list[str]:
"""List the tables in the database.
Expand All @@ -873,34 +873,33 @@ def list_tables(
like
A pattern to use for listing tables.
database
The database (project) to perform the list against.
schema
The schema (dataset) inside `database` to perform the list against.
The database location to perform the list against.
::: {.callout-warning}
## `schema` refers to database hierarchy
By default uses the current `dataset` (`self.current_database`) and
`project` (`self.current_catalog`).
The `schema` parameter does **not** refer to the column names and
types of `table`.
:::
To specify a table in a separate BigQuery dataset, you can pass in the
dataset and project as a string `"dataset.project"`, or as a tuple of
strings `("dataset", "project")`.
::: {.callout-note}
## Ibis does not use the word `schema` to refer to database hierarchy.
A collection of tables is referred to as a `database`.
A collection of `database` is referred to as a `catalog`.
These terms are mapped onto the corresponding features in each
backend (where available), regardless of whether the backend itself
uses the same terminology.
:::
schema
[deprecated] The schema (dataset) inside `database` to perform the list against.
"""
if database is not None and schema is None:
raise com.com.IbisInputError(
f"{self.name} cannot list tables only using `database` specifier. "
"Include a `schema` argument."
)
elif database is None and schema is not None:
database = sg.parse_one(schema, into=sge.Table, read=self.name)
database.args["quoted"] = False
database = database.sql(dialect=self.name)
else:
database = (
sg.table(schema, db=database, quoted=False).sql(dialect=self.name)
or None
)
table_loc = self._warn_and_create_table_loc(database, schema)

project, dataset = self._parse_project_and_dataset(database)
table_loc = table_loc.sql(dialect=self.name)

project, dataset = self._parse_project_and_dataset(table_loc)
dataset_ref = bq.DatasetReference(project, dataset)
result = [table.table_id for table in self.client.list_tables(dataset_ref)]
return self._filter_with_like(result, like)
Expand Down
20 changes: 18 additions & 2 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ def test_exists_table_different_project(con):
name = "co_daily_summary"
dataset = "bigquery-public-data.epa_historical_air_quality"

assert name in con.list_tables(schema=dataset)
assert "foobar" not in con.list_tables(schema=dataset)
assert name in con.list_tables(database=dataset)
assert "foobar" not in con.list_tables(database=dataset)


def test_multiple_project_queries(con, snapshot):
Expand Down Expand Up @@ -401,3 +401,19 @@ def test_create_table_with_options(con):
assert t.execute().empty
finally:
con.drop_table(name)


def test_list_tables_schema_warning_refactor(con):
pypi_tables = [
"external",
"native",
]

assert con.list_tables()

# Warn but succeed for schema list
with pytest.raises(FutureWarning):
assert con.list_tables(schema="pypi") == pypi_tables

assert con.list_tables(database="ibis-gbq.pypi") == pypi_tables
assert con.list_tables(database=("ibis-gbq", "pypi")) == pypi_tables
11 changes: 11 additions & 0 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,17 @@ def list_databases(self, like: str | None = None) -> list[str]:
def list_tables(
self, like: str | None = None, database: str | None = None
) -> list[str]:
"""List the tables in the database.
Parameters
----------
like
A pattern to use for listing tables.
database
Database to list tables from. Default behavior is to show tables in
the current database.
"""

query = sg.select(C.name).from_(sg.table("tables", db="system"))

if database is None:
Expand Down
15 changes: 14 additions & 1 deletion ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,20 @@ def list_tables(
like: str | None = None,
database: str | None = None,
) -> list[str]:
"""List the available tables."""
"""Return the list of table names in the current database.
Parameters
----------
like
A pattern in Python's regex format.
database
Unused in the datafusion backend.
Returns
-------
list[str]
The list of the table names that match the pattern `like`.
"""
return self._filter_with_like(self.con.tables(), like)

def get_schema(
Expand Down
10 changes: 10 additions & 0 deletions ibis/backends/druid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ def drop_table(self, *args, **kwargs):
def list_tables(
self, like: str | None = None, database: str | None = None
) -> list[str]:
"""List the tables in the database.
Parameters
----------
like
A pattern to use for listing tables.
database
Database to list tables from. Default behavior is to show tables in
the current database.
"""
t = sg.table("TABLES", db="INFORMATION_SCHEMA", quoted=True)
c = self.compiler
query = sg.select(sg.column("TABLE_NAME", quoted=True)).from_(t).sql(c.dialect)
Expand Down
46 changes: 34 additions & 12 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def read_delta(
def list_tables(
self,
like: str | None = None,
database: str | None = None,
database: tuple[str, str] | str | None = None,
schema: str | None = None,
) -> list[str]:
"""List tables and views.
Expand All @@ -903,10 +903,27 @@ def list_tables(
like
Regex to filter by table/view name.
database
Database name. If not passed, uses the current database. Only
supported with MotherDuck.
Database location. If not passed, uses the current database.
By default uses the current `database` (`self.current_database`) and
`catalog` (`self.current_catalog`).
To specify a table in a separate catalog, you can pass in the
catalog and database as a string `"catalog.database"`, or as a tuple of
strings `("catalog", "database")`.
::: {.callout-note}
## Ibis does not use the word `schema` to refer to database hierarchy.
A collection of tables is referred to as a `database`.
A collection of `database` is referred to as a `catalog`.
These terms are mapped onto the corresponding features in each
backend (where available), regardless of whether the backend itself
uses the same terminology.
:::
schema
Schema name. If not passed, uses the current schema.
[deprecated] Schema name. If not passed, uses the current schema.
Returns
-------
Expand All @@ -923,29 +940,34 @@ def list_tables(
>>> bar = con.create_view("bar", foo)
>>> con.list_tables()
['bar', 'foo']
>>> con.create_schema("my_schema")
>>> con.list_tables(schema="my_schema")
>>> con.create_database("my_database")
>>> con.list_tables(database="my_database")
[]
>>> with con.begin() as c:
... c.exec_driver_sql("CREATE TABLE my_schema.baz (a INTEGER)") # doctest: +ELLIPSIS
... c.exec_driver_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS
<...>
>>> con.list_tables(schema="my_schema")
>>> con.list_tables(database="my_database")
['baz']
"""
database = F.current_database() if database is None else sge.convert(database)
schema = F.current_schema() if schema is None else sge.convert(schema)
table_loc = self._warn_and_create_table_loc(database, schema)

catalog = F.current_database()
database = F.current_schema()
if table_loc is not None:
catalog = table_loc.catalog or catalog
database = table_loc.db or database

col = "table_name"
sql = (
sg.select(col)
.from_(sg.table("tables", db="information_schema"))
.distinct()
.where(
C.table_catalog.eq(database).or_(
C.table_catalog.eq(catalog).or_(
C.table_catalog.eq(sge.convert("temp"))
),
C.table_schema.eq(schema),
C.table_schema.eq(database),
)
.sql(self.name, pretty=True)
)
Expand Down
19 changes: 19 additions & 0 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,22 @@ def test_invalid_connect(tmp_path):
url = f"duckdb://{tmp_path}?read_only=invalid_value"
with pytest.raises(ValueError):
ibis.connect(url)


def test_list_tables_schema_warning_refactor(con):
assert {
"astronauts",
"awards_players",
"batting",
"diamonds",
"functional_alltypes",
"win",
}.issubset(con.list_tables())

icecream_table = ["ice_cream"]

with pytest.warns(FutureWarning):
assert con.list_tables(schema="shops") == icecream_table

assert con.list_tables(database="shops") == icecream_table
assert con.list_tables(database=("shops",)) == icecream_table
10 changes: 10 additions & 0 deletions ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,16 @@ def _safe_raw_sql(self, query: str, *args, **kwargs):
yield cur.execute(query, *args, **kwargs)

def list_tables(self, like=None, database=None):
"""List the tables in the database.
Parameters
----------
like
A pattern to use for listing tables.
database
Database to list tables from. Default behavior is to show tables in
the current database.
"""
tables = sg.select("table_name").from_(
sg.table("EXA_ALL_TABLES", catalog="SYS")
)
Expand Down
16 changes: 16 additions & 0 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,22 @@ def list_databases(self, like=None):
return self._filter_with_like(databases.name.tolist(), like)

def list_tables(self, like=None, database=None):
"""Return the list of table names in the current database.
Parameters
----------
like
A pattern in Python's regex format.
database
The database from which to list tables.
If not provided, the current database is used.
Returns
-------
list[str]
The list of the table names that match the pattern `like`.
"""

statement = "SHOW TABLES"
if database is not None:
statement += f" IN {database}"
Expand Down
Loading

0 comments on commit ed69960

Please sign in to comment.