Skip to content

Commit

Permalink
refactor(tests): clean up backend test setup to make non-data-loading…
Browse files Browse the repository at this point in the history
… steps atomic
  • Loading branch information
cpcloud authored and kszucs committed Jul 17, 2023
1 parent 50f3be9 commit 16b4632
Show file tree
Hide file tree
Showing 26 changed files with 633 additions and 896 deletions.
File renamed without changes.
21 changes: 11 additions & 10 deletions ibis/backends/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
from ibis.backends.tests.data import json_types, non_null_array_types, struct_types, win

if TYPE_CHECKING:
import pathlib
from pathlib import Path

import ibis.expr.types as ir

DATASET_ID = "ibis_gbq_testing"
Expand Down Expand Up @@ -83,13 +80,13 @@ class TestConf(UnorderedComparator, BackendTest, RoundAwayFromZero):
supports_structs = True
supports_json = True
check_names = False
deps = ("google.cloud.bigquery",)

@staticmethod
def format_table(name: str) -> str:
return f"{DATASET_ID}.{name}"

@staticmethod
def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
def _load_data(self, **_: Any) -> None:
"""Load test data into a BigQuery instance."""

credentials, default_project_id = google.auth.default(
Expand Down Expand Up @@ -198,7 +195,7 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("avro", "struct_table.avro").read_bytes()
self.data_dir.joinpath("avro", "struct_table.avro").read_bytes()
),
bq.TableReference(testing_dataset, "struct_table"),
job_config=bq.LoadJobConfig(
Expand Down Expand Up @@ -267,7 +264,9 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
self.data_dir.joinpath(
"parquet", f"{table}.parquet"
).read_bytes()
),
bq.TableReference(testing_dataset, table),
job_config=bq.LoadJobConfig(
Expand All @@ -286,7 +285,9 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
self.data_dir.joinpath(
"parquet", f"{table}.parquet"
).read_bytes()
),
bq.TableReference(testing_dataset_tokyo, table),
job_config=bq.LoadJobConfig(
Expand All @@ -307,7 +308,7 @@ def functional_alltypes(self) -> ir.Table:
return t.select(~s.c("index", "Unnamed_0"))

@staticmethod
def connect(data_directory: pathlib.Path) -> Backend:
def connect(*, tmpdir, worker_id, **kw) -> Backend:
"""Connect to the test project and dataset."""
credentials, default_project_id = google.auth.default(
scopes=EXTERNAL_DATA_SCOPES
Expand All @@ -317,7 +318,7 @@ def connect(data_directory: pathlib.Path) -> Backend:
os.environ.get(PROJECT_ID_ENV_VAR, default_project_id) or DEFAULT_PROJECT_ID
)
con = ibis.bigquery.connect(
project_id=project_id, dataset_id=DATASET_ID, credentials=credentials
project_id=project_id, dataset_id=DATASET_ID, credentials=credentials, **kw
)
expr = ibis.literal(1)
try:
Expand Down
76 changes: 29 additions & 47 deletions ibis/backends/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

import contextlib
import os
from typing import TYPE_CHECKING, Callable
from typing import TYPE_CHECKING, Any, Callable, Iterable

import pytest

import ibis
import ibis.expr.types as ir
from ibis import util
from ibis.backends.tests.base import (
RoundHalfToEven,
ServiceBackendTest,
ServiceSpec,
UnorderedComparator,
)

Expand All @@ -32,28 +32,22 @@ class TestConf(UnorderedComparator, ServiceBackendTest, RoundHalfToEven):
supported_to_timestamp_units = {'s'}
supports_floating_modulus = False
supports_json = False
data_volume = "/var/lib/clickhouse/user_files/ibis"
service_name = "clickhouse"
deps = ("clickhouse_connect",)

@property
def native_bool(self) -> bool:
[(value,)] = self.connection.con.query("SELECT true").result_set
return isinstance(value, bool)

@classmethod
def service_spec(cls, data_dir: Path) -> ServiceSpec:
return ServiceSpec(
name=cls.name(),
data_volume="/var/lib/clickhouse/user_files/ibis",
files=data_dir.joinpath("parquet").glob("*.parquet"),
)
@property
def test_files(self) -> Iterable[Path]:
return self.data_dir.joinpath("parquet").glob("*.parquet")

@staticmethod
def _load_data(
data_dir: Path,
script_dir: Path,
host: str = CLICKHOUSE_HOST,
port: int = CLICKHOUSE_PORT,
user: str = CLICKHOUSE_USER,
password: str = CLICKHOUSE_PASS,
self,
*,
database: str = IBIS_TEST_CLICKHOUSE_DB,
**_,
) -> None:
Expand All @@ -66,35 +60,28 @@ def _load_data(
script_dir
Location of scripts defining schemas
"""
cc = pytest.importorskip("clickhouse_connect")

client = cc.get_client(
host=host,
port=port,
user=user,
password=password,
settings={
"allow_experimental_object_type": 1,
"output_format_json_named_tuples_as_objects": 1,
},
)
import clickhouse_connect as cc

con = self.connection
client = con.con

with contextlib.suppress(cc.driver.exceptions.DatabaseError):
client.command(f"CREATE DATABASE {database} ENGINE = Atomic")

with open(script_dir / 'schema' / 'clickhouse.sql') as schema:
for stmt in filter(None, map(str.strip, schema.read().split(";"))):
client.command(stmt)
util.consume(map(client.command, self.ddl_script))

def postload(self, **kw: Any):
# reconnect to set the database to the test database
self.connection = self.connect(database=IBIS_TEST_CLICKHOUSE_DB, **kw)

@staticmethod
def connect(data_directory: Path):
pytest.importorskip("clickhouse_connect")
def connect(*, tmpdir, worker_id, **kw: Any):
return ibis.clickhouse.connect(
host=CLICKHOUSE_HOST,
port=CLICKHOUSE_PORT,
password=CLICKHOUSE_PASS,
database=IBIS_TEST_CLICKHOUSE_DB,
user=CLICKHOUSE_USER,
**kw,
)

@staticmethod
Expand All @@ -114,27 +101,22 @@ def least(f: Callable[..., ir.Value], *args: ir.Value) -> ir.Value:
return f(*args)


@pytest.fixture(scope='module')
def con(tmp_path_factory, data_directory, script_directory, worker_id):
return TestConf.load_data(
data_directory,
script_directory,
tmp_path_factory,
worker_id,
).connect(data_directory)
@pytest.fixture(scope='session')
def con(tmp_path_factory, data_dir, worker_id):
return TestConf.load_data(data_dir, tmp_path_factory, worker_id).connection


@pytest.fixture(scope='module')
@pytest.fixture(scope='session')
def db(con):
return con.database()


@pytest.fixture(scope='module')
def alltypes(db):
return db.functional_alltypes
@pytest.fixture(scope='session')
def alltypes(con):
return con.tables.functional_alltypes


@pytest.fixture(scope='module')
@pytest.fixture(scope='session')
def df(alltypes):
return alltypes.execute()

Expand Down
54 changes: 14 additions & 40 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import sys
from functools import lru_cache
from pathlib import Path
from typing import Any, TextIO
from typing import Any, Iterable

import _pytest
import numpy as np
Expand Down Expand Up @@ -126,19 +126,7 @@


@pytest.fixture(scope='session')
def script_directory() -> Path:
"""Return the test script directory.
Returns
-------
Path
Test script directory
"""
return Path(__file__).absolute().parents[2] / "ci"


@pytest.fixture(scope='session')
def data_directory() -> Path:
def data_dir() -> Path:
"""Return the test data directory.
Returns
Expand Down Expand Up @@ -178,7 +166,7 @@ def recreate_database(
def init_database(
url: sa.engine.url.URL,
database: str,
schema: TextIO | None = None,
schema: Iterable[str] | None = None,
recreate: bool = True,
isolation_level: str | None = "AUTOCOMMIT",
**kwargs: Any,
Expand Down Expand Up @@ -220,11 +208,7 @@ def init_database(

if schema:
with engine.begin() as conn:
for stmt in filter(
None,
map(str.strip, schema.read().split(';')),
):
conn.exec_driver_sql(stmt)
util.consume(map(conn.exec_driver_sql, schema))

return engine

Expand Down Expand Up @@ -530,11 +514,11 @@ def pytest_runtest_call(item):


@pytest.fixture(params=_get_backends_to_test(), scope='session')
def backend(request, data_directory, script_directory, tmp_path_factory, worker_id):
def backend(request, data_dir, tmp_path_factory, worker_id):
"""Return an instance of BackendTest, loaded with data."""

cls = _get_backend_conf(request.param)
return cls.load_data(data_directory, script_directory, tmp_path_factory, worker_id)
return cls.load_data(data_dir, tmp_path_factory, worker_id)


@pytest.fixture(scope="session")
Expand All @@ -543,9 +527,7 @@ def con(backend):
return backend.connection


def _setup_backend(
request, data_directory, script_directory, tmp_path_factory, worker_id
):
def _setup_backend(request, data_dir, tmp_path_factory, worker_id):
if (backend := request.param) == "duckdb" and platform.system() == "Windows":
pytest.xfail(
"windows prevents two connections to the same duckdb file "
Expand All @@ -554,20 +536,16 @@ def _setup_backend(
return None
else:
cls = _get_backend_conf(backend)
return cls.load_data(
data_directory, script_directory, tmp_path_factory, worker_id
)
return cls.load_data(data_dir, tmp_path_factory, worker_id)


@pytest.fixture(
params=_get_backends_to_test(discard=("dask", "pandas")),
scope='session',
)
def ddl_backend(request, data_directory, script_directory, tmp_path_factory, worker_id):
def ddl_backend(request, data_dir, tmp_path_factory, worker_id):
"""Set up the backends that are SQL-based."""
return _setup_backend(
request, data_directory, script_directory, tmp_path_factory, worker_id
)
return _setup_backend(request, data_dir, tmp_path_factory, worker_id)


@pytest.fixture(scope='session')
Expand All @@ -591,13 +569,9 @@ def ddl_con(ddl_backend):
),
scope='session',
)
def alchemy_backend(
request, data_directory, script_directory, tmp_path_factory, worker_id
):
def alchemy_backend(request, data_dir, tmp_path_factory, worker_id):
"""Set up the SQLAlchemy-based backends."""
return _setup_backend(
request, data_directory, script_directory, tmp_path_factory, worker_id
)
return _setup_backend(request, data_dir, tmp_path_factory, worker_id)


@pytest.fixture(scope='session')
Expand All @@ -610,10 +584,10 @@ def alchemy_con(alchemy_backend):
params=_get_backends_to_test(keep=("dask", "pandas", "pyspark")),
scope='session',
)
def udf_backend(request, data_directory, script_directory, tmp_path_factory, worker_id):
def udf_backend(request, data_dir, tmp_path_factory, worker_id):
"""Runs the UDF-supporting backends."""
cls = _get_backend_conf(request.param)
return cls.load_data(data_directory, script_directory, tmp_path_factory, worker_id)
return cls.load_data(data_dir, tmp_path_factory, worker_id)


@pytest.fixture(scope='session')
Expand Down
Loading

0 comments on commit 16b4632

Please sign in to comment.