Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sql): enable cross-database joins #9849

Merged
merged 4 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/mysql/startup.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis';
CREATE SCHEMA IF NOT EXISTS test_schema;
GRANT CREATE,SELECT,DROP ON *.* TO 'ibis'@'%';
GRANT CREATE,SELECT,DROP,INSERT ON *.* TO 'ibis'@'%';
FLUSH PRIVILEGES;
36 changes: 36 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,3 +1726,39 @@ def test_no_accidental_cross_database_table_load(con_create_database):
# Clean up
con.drop_table(table, database=dbname)
con.drop_database(dbname)


@pytest.mark.notyet(["druid"], reason="can't create tables")
@pytest.mark.notyet(
["flink"], reason="can't create non-temporary tables from in-memory data"
)
def test_cross_database_join(con_create_database, monkeypatch):
con = con_create_database

monkeypatch.setattr(ibis.options, "default_backend", con)

left = ibis.memtable({"a": [1], "b": [2]})
right = ibis.memtable({"a": [1], "c": [3]})

# Create an extra database
con.create_database(dbname := gen_name("dummy_db"))

# Insert left into current_database
left = con.create_table(left_table := gen_name("left"), obj=left)

# Insert right into new database
right = con.create_table(
right_table := gen_name("right"), obj=right, database=dbname
)

expr = left.join(right, "a")
assert expr.columns == ["a", "b", "c"]

result = expr.to_pyarrow()
expected = pa.Table.from_pydict({"a": [1], "b": [2], "c": [3]})

assert result.equals(expected)

con.drop_table(left_table)
con.drop_table(right_table, database=dbname)
con.drop_database(dbname)
16 changes: 12 additions & 4 deletions ibis/backends/trino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,12 @@ def create_table(
The schema of the table to create; optional, but one of `obj` or
`schema` must be specified
database
Not yet implemented.
The database to insert the table into.
If not provided, the current database is used.
You can provide a single database name, like `"mydb"`. For
multi-level hierarchies, you can pass in a dotted string path like
`"catalog.database"` or a tuple of strings like `("catalog",
"database")`.
temp
This parameter is not yet supported in the Trino backend, because
Trino doesn't implement temporary tables
Expand All @@ -436,13 +441,16 @@ def create_table(
"Temporary tables are not supported in the Trino backend"
)

table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)

quoted = self.compiler.quoted
orig_table_ref = sg.to_identifier(name, quoted=quoted)
orig_table_ref = sg.table(name, catalog=catalog, db=db, quoted=quoted)

if overwrite:
name = util.gen_name(f"{self.name}_overwrite")

table_ref = sg.table(name, catalog=database, quoted=quoted)
table_ref = sg.table(name, catalog=catalog, db=db, quoted=quoted)

if schema is not None and obj is None:
column_defs = [
Expand Down Expand Up @@ -524,7 +532,7 @@ def create_table(
if temp_memtable_view is not None:
self.drop_table(temp_memtable_view)

return self.table(orig_table_ref.name)
return self.table(orig_table_ref.name, database=(catalog, db))

def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:
import pandas as pd
Expand Down