Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: deprecate register api #8863

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from ibis.backends.sql.compiler import C
from ibis.expr.operations.udf import InputType
from ibis.formats.pyarrow import PyArrowType
from ibis.util import gen_name, normalize_filename
from ibis.util import deprecated, gen_name, normalize_filename

try:
from datafusion import ExecutionContext as SessionContext
Expand Down Expand Up @@ -294,6 +294,10 @@ def get_schema(
table = database.table(table_name)
return sch.schema(table.schema)

@deprecated(
as_of="9.1",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
Expand Down
11 changes: 7 additions & 4 deletions ibis/backends/datafusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ def _load_data(self, **_: Any) -> None:
con = self.connection
for table_name in TEST_TABLES:
path = self.data_dir / "parquet" / f"{table_name}.parquet"
con.register(path, table_name=table_name)
con.register(array_types, table_name="array_types")
con.register(win, table_name="win")
con.register(topk, table_name="topk")
with pytest.warns(FutureWarning, match="v9.1"):
con.register(path, table_name=table_name)
# TODO: remove warnings and replace register when implementing 8858
with pytest.warns(FutureWarning, match="v9.1"):
con.register(array_types, table_name="array_types")
con.register(win, table_name="win")
con.register(topk, table_name="topk")

@staticmethod
def connect(*, tmpdir, worker_id, **kw):
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/datafusion/tests/test_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ def test_none_config():

def test_str_config(name_to_path):
config = {name: str(path) for name, path in name_to_path.items()}
conn = ibis.datafusion.connect(config)
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.1"):
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


def test_path_config(name_to_path):
config = name_to_path
conn = ibis.datafusion.connect(config)
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.1"):
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


Expand Down
18 changes: 11 additions & 7 deletions ibis/backends/datafusion/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,28 @@ def test_read_parquet(conn, data_dir):

def test_register_table(conn):
tab = pa.table({"x": [1, 2, 3]})
conn.register(tab, "my_table")
with pytest.warns(FutureWarning, match="v9.1"):
conn.register(tab, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_pandas(conn):
df = pd.DataFrame({"x": [1, 2, 3]})
conn.register(df, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.1"):
conn.register(df, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_batches(conn):
batch = pa.record_batch([pa.array([1, 2, 3])], names=["x"])
conn.register(batch, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.1"):
conn.register(batch, "my_table")
assert conn.table("my_table").x.sum().execute() == 6


def test_register_dataset(conn):
tab = pa.table({"x": [1, 2, 3]})
dataset = ds.InMemoryDataset(tab)
conn.register(dataset, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
with pytest.warns(FutureWarning, match="v9.1"):
conn.register(dataset, "my_table")
assert conn.table("my_table").x.sum().execute() == 6
5 changes: 5 additions & 0 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compiler import STAR, C
from ibis.expr.operations.udf import InputType
from ibis.util import deprecated

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
Expand Down Expand Up @@ -534,6 +535,10 @@ def drop_database(
with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)):
pass

@deprecated(
as_of="9.1",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
4 changes: 3 additions & 1 deletion ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,9 @@ def test_connect_duckdb(url, tmp_path):
)
def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path):
getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}")
con = ibis.connect(tmp_path / f"out.{extension}")
with pytest.warns(FutureWarning, match="v9.1"):
# ibis.connect uses con.register
con = ibis.connect(tmp_path / f"out.{extension}")
t = next(iter(con.tables.values()))
assert not t.head().execute().empty

Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,10 @@ def test_read_sqlite_no_table_name(con, tmp_path):
)
def test_register_sqlite(con, tmp_path):
path = tmp_path / "test.db"

sqlite_con = sqlite3.connect(str(path))
sqlite_con.execute("CREATE TABLE t AS SELECT 1 a UNION SELECT 2 UNION SELECT 3")
ft = con.register(f"sqlite://{path}", "t")
with pytest.warns(FutureWarning, match="v9.1"):
ft = con.register(f"sqlite://{path}", "t")
assert ft.count().execute()


Expand Down
6 changes: 5 additions & 1 deletion ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ibis.backends.sql.dialects import Polars
from ibis.expr.rewrites import lower_stringslice
from ibis.formats.polars import PolarsSchema
from ibis.util import gen_name, normalize_filename, normalize_filenames
from ibis.util import deprecated, gen_name, normalize_filename, normalize_filenames

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -74,6 +74,10 @@ def table(self, name: str, _schema: sch.Schema | None = None) -> ir.Table:
schema = PolarsSchema.to_ibis(self._tables[name].schema)
return ops.DatabaseTable(name, schema, self).to_expr()

@deprecated(
as_of="9.1",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
11 changes: 7 additions & 4 deletions ibis/backends/polars/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ def _load_data(self, **_: Any) -> None:
con = self.connection
for table_name in TEST_TABLES:
path = self.data_dir / "parquet" / f"{table_name}.parquet"
con.register(path, table_name=table_name)
con.register(array_types, table_name="array_types")
con.register(struct_types, table_name="struct")
con.register(win, table_name="win")
with pytest.warns(FutureWarning, match="v9.1"):
con.register(path, table_name=table_name)
# TODO: remove warnings and replace register when implementing 8858
with pytest.warns(FutureWarning, match="v9.1"):
con.register(array_types, table_name="array_types")
con.register(struct_types, table_name="struct")
con.register(win, table_name="win")

# TODO: remove when pyarrow inputs are supported
con._add_table("topk", pl.from_arrow(topk).lazy())
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ibis.backends.sql import SQLBackend
from ibis.expr.operations.udf import InputType
from ibis.legacy.udf.vectorized import _coerce_to_series
from ibis.util import deprecated

Check warning on line 30 in ibis/backends/pyspark/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/pyspark/__init__.py#L30

Added line #L30 was not covered by tests

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
Expand Down Expand Up @@ -739,6 +740,10 @@
spark_df.createOrReplaceTempView(table_name)
return self.table(table_name)

@deprecated(

Check warning on line 743 in ibis/backends/pyspark/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/pyspark/__init__.py#L743

Added line #L743 was not covered by tests
as_of="9.1",
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
)
def register(
self,
source: str | Path | Any,
Expand Down
47 changes: 33 additions & 14 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def gzip_csv(data_dir, tmp_path):
return str(f.absolute())


# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
[
Expand Down Expand Up @@ -99,13 +100,15 @@ def gzip_csv(data_dir, tmp_path):
)
def test_register_csv(con, data_dir, fname, in_table_name, out_table_name):
with pushd(data_dir / "csv"):
table = con.register(fname, table_name=in_table_name)
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(fname, table_name=in_table_name)

assert any(out_table_name in t for t in con.list_tables())
if con.name != "datafusion":
table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notimpl(["datafusion"])
@pytest.mark.notyet(
[
Expand All @@ -126,11 +129,13 @@ def test_register_csv(con, data_dir, fname, in_table_name, out_table_name):
)
def test_register_csv_gz(con, data_dir, gzip_csv):
with pushd(data_dir):
table = con.register(gzip_csv)
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(gzip_csv)

assert table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notyet(
[
"bigquery",
Expand All @@ -154,7 +159,8 @@ def test_register_with_dotted_name(con, data_dir, tmp_path):
f.parent.mkdir()
data = data_dir.joinpath("csv", "diamonds.csv").read_bytes()
f.write_bytes(data)
table = con.register(str(f.absolute()))
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(str(f.absolute()))

if con.name != "datafusion":
table.count().execute()
Expand All @@ -173,6 +179,7 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:
return pac.read_csv(data_dir / f"{table_name}.csv", convert_options=convert_options)


# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
[
Expand Down Expand Up @@ -216,14 +223,16 @@ def test_register_parquet(
pq.write_table(table, tmp_path / fname.name)

with pushd(tmp_path):
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)

assert any(out_table_name in t for t in con.list_tables())
assert any(out_table_name in t for t in con.list_tables())

if con.name != "datafusion":
table.count().execute()


# TODO: rewrite or delete test when register api is removed
@pytest.mark.notyet(
[
"bigquery",
Expand Down Expand Up @@ -255,15 +264,20 @@ def test_register_iterator_parquet(
pq.write_table(table, tmp_path / "functional_alltypes.parquet")

with pushd(tmp_path):
table = con.register(
["parquet://functional_alltypes.parquet", "functional_alltypes.parquet"],
table_name=None,
)
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(
[
"parquet://functional_alltypes.parquet",
"functional_alltypes.parquet",
],
table_name=None,
)

assert any("ibis_read_parquet" in t for t in con.list_tables())
assert table.count().execute()


# TODO: modify test to use read_in_memory when implemented xref: 8858
@pytest.mark.notimpl(["datafusion"])
@pytest.mark.notyet(
[
Expand All @@ -287,14 +301,17 @@ def test_register_pandas(con):
pd = pytest.importorskip("pandas")
df = pd.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]})

t = con.register(df)
with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(df)
assert t.x.sum().execute() == 6

t = con.register(df, "my_table")
with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(df, "my_table")
assert t.op().name == "my_table"
assert t.x.sum().execute() == 6


# TODO: modify test to use read_in_memory when implemented xref: 8858
@pytest.mark.notimpl(["datafusion", "polars"])
@pytest.mark.notyet(
[
Expand All @@ -318,7 +335,8 @@ def test_register_pyarrow_tables(con):
pa = pytest.importorskip("pyarrow")
pa_t = pa.Table.from_pydict({"x": [1, 2, 3], "y": ["a", "b", "c"]})

t = con.register(pa_t)
with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(pa_t)
assert t.x.sum().execute() == 6


Expand Down Expand Up @@ -351,8 +369,9 @@ def test_csv_reregister_schema(con, tmp_path):
]
)

# For a full file scan, expect correct schema based on final row
foo_table = con.register(foo, table_name="same")
with pytest.warns(FutureWarning, match="v9.1"):
# For a full file scan, expect correct schema based on final row
foo_table = con.register(foo, table_name="same")
result_schema = foo_table.schema()

assert result_schema.names == ("cola", "colb", "colc")
Expand Down
Loading