Skip to content

Commit

Permalink
ci(backends): run backend doctests in CI (ibis-project#9970)
Browse files Browse the repository at this point in the history
Co-authored-by: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com>
  • Loading branch information
cpcloud and gmcrocetti authored Sep 11, 2024
1 parent a205ab7 commit bac76ff
Show file tree
Hide file tree
Showing 19 changed files with 272 additions and 125 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,14 @@ jobs:
FLINK_REMOTE_CLUSTER_PORT: "8081"
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}

- name: "run backend doctests: ${{ matrix.backend.name }}"
if: matrix.os == 'ubuntu-latest'
run: just backend-doctests ${{ matrix.backend.name }}
env:
FLINK_REMOTE_CLUSTER_ADDR: localhost
FLINK_REMOTE_CLUSTER_PORT: "8081"
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}

- name: check that no untracked files were produced
shell: bash
run: |
Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ def do_connect(
>>> import ibis
>>> client = ibis.clickhouse.connect()
>>> client
<ibis.clickhouse.client.ClickhouseClient object at 0x...>
<ibis.backends.clickhouse.Backend object at 0x...>
"""
if settings is None:
settings = {}
Expand Down
11 changes: 5 additions & 6 deletions ibis/backends/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,12 @@ def do_connect(
Examples
--------
>>> import ibis
>>> import pandas as pd
>>> import dask.dataframe as dd
>>> data = {
... "t": dd.read_parquet("path/to/file.parquet"),
... "s": dd.read_csv("path/to/file.csv"),
... }
>>> ibis.dask.connect(data)
>>> ibis.dask.connect(
... {"t": dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}), npartitions=1)}
... ) # doctest: +ELLIPSIS
<ibis.backends.dask.Backend object at 0x...>
"""
super().do_connect(dictionary)

Expand Down
72 changes: 31 additions & 41 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,25 @@ def do_connect(
Examples
--------
>>> import ibis
>>> config = {"t": "path/to/file.parquet", "s": "path/to/file.csv"}
>>> ibis.datafusion.connect(config)
>>> config = {
... "astronauts": "ci/ibis-testing-data/parquet/astronauts.parquet",
... "diamonds": "ci/ibis-testing-data/csv/diamonds.csv",
... }
>>> con = ibis.datafusion.connect(config)
>>> con.list_tables()
['astronauts', 'diamonds']
>>> con.table("diamonds")
DatabaseTable: diamonds
carat float64
cut string
color string
clarity string
depth float64
table float64
price int64
x float64
y float64
z float64
"""
if isinstance(config, SessionContext):
(self.con, config) = (config, None)
Expand All @@ -121,7 +137,7 @@ def do_connect(
config = {}

for name, path in config.items():
self.register(path, table_name=name)
self._register(path, table_name=name)

@util.experimental
@classmethod
Expand Down Expand Up @@ -300,8 +316,11 @@ def list_tables(
sg.select("table_name")
.from_("information_schema.tables")
.where(sg.column("table_schema").eq(sge.convert(database)))
.order_by("table_name")
)
return self._filter_with_like(
self.raw_sql(query).to_pydict()["table_name"], like
)
return self.raw_sql(query).to_pydict()["table_name"]

def get_schema(
self,
Expand Down Expand Up @@ -333,43 +352,14 @@ def register(
table_name: str | None = None,
**kwargs: Any,
) -> ir.Table:
"""Register a data set with `table_name` located at `source`.
Parameters
----------
source
The data source(s). May be a path to a file or directory of
parquet/csv files, a pandas dataframe, or a pyarrow table, dataset
or record batch.
table_name
The name of the table
kwargs
DataFusion-specific keyword arguments
Examples
--------
Register a csv:
return self._register(source, table_name, **kwargs)

>>> import ibis
>>> conn = ibis.datafusion.connect(config)
>>> conn.register("path/to/data.csv", "my_table")
>>> conn.table("my_table")
Register a PyArrow table:
>>> import pyarrow as pa
>>> tab = pa.table({"x": [1, 2, 3]})
>>> conn.register(tab, "my_table")
>>> conn.table("my_table")
Register a PyArrow dataset:
>>> import pyarrow.dataset as ds
>>> dataset = ds.dataset("path/to/table")
>>> conn.register(dataset, "my_table")
>>> conn.table("my_table")
"""
def _register(
self,
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
table_name: str | None = None,
**kwargs: Any,
) -> ir.Table:
import pandas as pd

if isinstance(source, (str, Path)):
Expand Down
8 changes: 2 additions & 6 deletions ibis/backends/datafusion/tests/test_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@ def test_none_config():

def test_str_config(name_to_path):
config = {name: str(path) for name, path in name_to_path.items()}
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.1"):
conn = ibis.datafusion.connect(config)
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


def test_path_config(name_to_path):
config = name_to_path
# if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register
with pytest.warns(FutureWarning, match="v9.1"):
conn = ibis.datafusion.connect(config)
conn = ibis.datafusion.connect(config)
assert sorted(conn.list_tables()) == sorted(name_to_path)


Expand Down
27 changes: 26 additions & 1 deletion ibis/backends/druid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,32 @@ def current_database(self) -> str:
return "druid"

def do_connect(self, **kwargs: Any) -> None:
"""Create an Ibis client using the passed connection parameters."""
"""Create an Ibis client using the passed connection parameters.
Examples
--------
>>> import ibis
>>> con = ibis.connect("druid://localhost:8082/druid/v2/sql?header=true")
>>> con.list_tables() # doctest: +ELLIPSIS
[...]
>>> t = con.table("functional_alltypes")
>>> t
DatabaseTable: functional_alltypes
__time timestamp
id int64
bool_col int64
tinyint_col int64
smallint_col int64
int_col int64
bigint_col int64
float_col float64
double_col float64
date_string_col string
string_col string
timestamp_col int64
year int64
month int64
"""
header = kwargs.pop("header", True)
self.con = pydruid.db.connect(**kwargs, header=header)

Expand Down
25 changes: 15 additions & 10 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,8 @@ def do_connect(
Examples
--------
>>> import ibis
>>> ibis.duckdb.connect("database.ddb", threads=4, memory_limit="1GB")
<ibis.backends.duckdb.Backend object at ...>
>>> ibis.duckdb.connect(threads=4, memory_limit="1GB") # doctest: +ELLIPSIS
<ibis.backends.duckdb.Backend object at 0x...>
"""
if not isinstance(database, Path) and not database.startswith(
("md:", "motherduck:", ":memory:")
Expand Down Expand Up @@ -1037,9 +1036,8 @@ def list_tables(
>>> con.create_database("my_database")
>>> con.list_tables(database="my_database")
[]
>>> with con.begin() as c:
... c.exec_driver_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS
<...>
>>> con.raw_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS
<duckdb.duckdb.DuckDBPyConnection object at 0x...>
>>> con.list_tables(database="my_database")
['baz']
Expand Down Expand Up @@ -1312,17 +1310,24 @@ def register_filesystem(self, filesystem: AbstractFileSystem):
--------
>>> import ibis
>>> import fsspec
>>> ibis.options.interactive = True
>>> gcs = fsspec.filesystem("gcs")
>>> con = ibis.duckdb.connect()
>>> con.register_filesystem(gcs)
>>> t = con.read_csv(
... "gcs://ibis-examples/data/band_members.csv.gz",
... table_name="band_members",
... )
DatabaseTable: band_members
name string
band string
>>> t
┏━━━━━━━━┳━━━━━━━━━┓
┃ name ┃ band ┃
┡━━━━━━━━╇━━━━━━━━━┩
│ string │ string │
├────────┼─────────┤
│ Mick │ Stones │
│ John │ Beatles │
│ Paul │ Beatles │
└────────┴─────────┘
"""
self.con.register_filesystem(filesystem)

Expand Down
27 changes: 27 additions & 0 deletions ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,33 @@ def do_connect(
kwargs
Additional keyword arguments passed to `pyexasol.connect`.
Examples
--------
>>> import os
>>> import ibis
>>> host = os.environ.get("IBIS_TEST_EXASOL_HOST", "localhost")
>>> user = os.environ.get("IBIS_TEST_EXASOL_USER", "sys")
>>> password = os.environ.get("IBIS_TEST_EXASOL_PASSWORD", "exasol")
>>> schema = os.environ.get("IBIS_TEST_EXASOL_DATABASE", "EXASOL")
>>> con = ibis.exasol.connect(schema=schema, host=host, user=user, password=password)
>>> con.list_tables() # doctest: +ELLIPSIS
[...]
>>> t = con.table("functional_alltypes")
>>> t
DatabaseTable: functional_alltypes
id int32
bool_col boolean
tinyint_col int16
smallint_col int16
int_col int32
bigint_col int64
float_col float64
double_col float64
date_string_col string
string_col string
timestamp_col timestamp(3)
year int32
month int32
"""
if kwargs.pop("quote_ident", None) is not None:
raise com.UnsupportedArgumentError(
Expand Down
5 changes: 2 additions & 3 deletions ibis/backends/flink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ def do_connect(self, table_env: TableEnvironment) -> None:
>>> import ibis
>>> from pyflink.table import EnvironmentSettings, TableEnvironment
>>> table_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode())
>>> ibis.flink.connect(table_env)
<ibis.backends.flink.Backend at 0x...>
>>> ibis.flink.connect(table_env) # doctest: +ELLIPSIS
<ibis.backends.flink.Backend object at 0x...>
"""
self._table_env = table_env

Expand Down
35 changes: 35 additions & 0 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,41 @@ def do_connect(
See https://learn.microsoft.com/en-us/sql/connect/odbc/windows/system-requirements-installation-and-driver-files
kwargs
Additional keyword arguments to pass to PyODBC.
Examples
--------
>>> import os
>>> import ibis
>>> host = os.environ.get("IBIS_TEST_MSSQL_HOST", "localhost")
>>> user = os.environ.get("IBIS_TEST_MSSQL_USER", "sa")
>>> password = os.environ.get("IBIS_TEST_MSSQL_PASSWORD", "1bis_Testing!")
>>> database = os.environ.get("IBIS_TEST_MSSQL_DATABASE", "ibis_testing")
>>> driver = os.environ.get("IBIS_TEST_MSSQL_PYODBC_DRIVER", "FreeTDS")
>>> con = ibis.mssql.connect(
... database=database,
... host=host,
... user=user,
... password=password,
... driver=driver,
... )
>>> con.list_tables() # doctest: +ELLIPSIS
[...]
>>> t = con.table("functional_alltypes")
>>> t
DatabaseTable: functional_alltypes
id int32
bool_col boolean
tinyint_col int16
smallint_col int16
int_col int32
bigint_col int64
float_col float32
double_col float64
date_string_col string
string_col string
timestamp_col timestamp(7)
year int32
month int32
"""

# If no user/password given, assume Windows Integrated Authentication
Expand Down
39 changes: 18 additions & 21 deletions ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,33 +123,30 @@ def do_connect(
Examples
--------
>>> import os
>>> import getpass
>>> import ibis
>>> host = os.environ.get("IBIS_TEST_MYSQL_HOST", "localhost")
>>> user = os.environ.get("IBIS_TEST_MYSQL_USER", getpass.getuser())
>>> password = os.environ.get("IBIS_TEST_MYSQL_PASSWORD")
>>> user = os.environ.get("IBIS_TEST_MYSQL_USER", "ibis")
>>> password = os.environ.get("IBIS_TEST_MYSQL_PASSWORD", "ibis")
>>> database = os.environ.get("IBIS_TEST_MYSQL_DATABASE", "ibis_testing")
>>> con = connect(database=database, host=host, user=user, password=password)
>>> con = ibis.mysql.connect(database=database, host=host, user=user, password=password)
>>> con.list_tables() # doctest: +ELLIPSIS
[...]
>>> t = con.table("functional_alltypes")
>>> t
MySQLTable[table]
name: functional_alltypes
schema:
id : int32
bool_col : int8
tinyint_col : int8
smallint_col : int16
int_col : int32
bigint_col : int64
float_col : float32
double_col : float64
date_string_col : string
string_col : string
timestamp_col : timestamp
year : int32
month : int32
DatabaseTable: functional_alltypes
id int32
bool_col int8
tinyint_col int8
smallint_col int16
int_col int32
bigint_col int64
float_col float32
double_col float64
date_string_col string
string_col string
timestamp_col timestamp
year int32
month int32
"""
self.con = pymysql.connect(
user=user,
Expand Down
Loading

0 comments on commit bac76ff

Please sign in to comment.