Skip to content

Commit

Permalink
refactor(test): cleanup test data
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Apr 25, 2023
1 parent b0f892f commit 7ae2b24
Show file tree
Hide file tree
Showing 50 changed files with 267 additions and 465 deletions.
2 changes: 1 addition & 1 deletion ci/schema/druid.sql
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/functional_alltypes.parquet"]}',
'{"type":"parquet"}',
'[{"name":"index","type":"long"},{"name":"Unnamed: 0","type":"long"},{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
'[{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
)
)
PARTITIONED BY ALL TIME;
2 changes: 0 additions & 2 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ CREATE OR REPLACE TABLE awards_players (
);

CREATE OR REPLACE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
bool_col BOOLEAN,
tinyint_col SMALLINT,
Expand Down
4 changes: 0 additions & 4 deletions ci/schema/mssql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)
DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
bool_col BIT,
tinyint_col SMALLINT,
Expand All @@ -91,8 +89,6 @@ BULK INSERT functional_alltypes
FROM '/data/functional_alltypes.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");

DROP TABLE IF EXISTS win;

CREATE TABLE win (g VARCHAR(MAX), x BIGINT, y BIGINT);
Expand Down
4 changes: 0 additions & 4 deletions ci/schema/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ CREATE TABLE awards_players (
DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
`index` BIGINT,
`Unnamed: 0` BIGINT,
id INTEGER,
bool_col BOOLEAN,
tinyint_col TINYINT,
Expand All @@ -71,8 +69,6 @@ CREATE TABLE functional_alltypes (
month INTEGER
) DEFAULT CHARACTER SET = utf8;

CREATE INDEX `ix_functional_alltypes_index` ON functional_alltypes (`index`);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);
Expand Down
4 changes: 0 additions & 4 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ CREATE TABLE awards_players (
DROP TABLE IF EXISTS functional_alltypes CASCADE;

CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
bool_col BOOLEAN,
tinyint_col SMALLINT,
Expand All @@ -80,8 +78,6 @@ CREATE TABLE functional_alltypes (
month INTEGER
);

CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");

DROP TABLE IF EXISTS tzone CASCADE;

CREATE TABLE tzone (
Expand Down
2 changes: 0 additions & 2 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ CREATE OR REPLACE TABLE awards_players (
);

CREATE OR REPLACE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SMALLINT,
Expand Down
4 changes: 0 additions & 4 deletions ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id BIGINT,
bool_col BOOLEAN,
tinyint_col BIGINT,
Expand All @@ -19,8 +17,6 @@ CREATE TABLE functional_alltypes (
CHECK (bool_col IN (0, 1))
);

CREATE INDEX ix_functional_alltypes_index ON "functional_alltypes" ("index");

DROP TABLE IF EXISTS awards_players;

CREATE TABLE awards_players (
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,8 @@ class BaseBackend(abc.ABC, _FileIOHandler):
table_class: type[ops.DatabaseTable] = ops.DatabaseTable
name: ClassVar[str]

supports_temporary_tables = False

def __init__(self, *args, **kwargs):
self._con_args: tuple[Any] = args
self._con_kwargs: dict[str, Any] = kwargs
Expand Down
52 changes: 18 additions & 34 deletions ibis/backends/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,34 +178,17 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
)
)

futures.append(
e.submit(
make_job,
client.load_table_from_file,
io.BytesIO(data_dir.joinpath("struct_table.avro").read_bytes()),
bq.TableReference(testing_dataset, "struct_table"),
job_config=bq.LoadJobConfig(
write_disposition=write_disposition,
source_format=bq.SourceFormat.AVRO,
),
)
)

futures.append(
e.submit(
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("functional_alltypes.csv").read_bytes()
data_dir.joinpath("avro", "struct_table.avro").read_bytes()
),
functional_alltypes_parted,
bq.TableReference(testing_dataset, "struct_table"),
job_config=bq.LoadJobConfig(
schema=ibis_schema_to_bq_schema(
TEST_TABLES["functional_alltypes"]
),
write_disposition=write_disposition,
source_format=bq.SourceFormat.CSV,
skip_leading_rows=1,
source_format=bq.SourceFormat.AVRO,
),
)
)
Expand Down Expand Up @@ -264,21 +247,22 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
)
)

for table, schema in TEST_TABLES.items():
futures.append(
e.submit(
make_job,
client.load_table_from_file,
io.BytesIO(data_dir.joinpath(f"{table}.csv").read_bytes()),
bq.TableReference(testing_dataset, table),
job_config=bq.LoadJobConfig(
schema=ibis_schema_to_bq_schema(schema),
write_disposition=bq.WriteDisposition.WRITE_TRUNCATE,
source_format=bq.SourceFormat.CSV,
skip_leading_rows=1,
),
)
futures.extend(
e.submit(
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
),
bq.TableReference(testing_dataset, table),
job_config=bq.LoadJobConfig(
schema=ibis_schema_to_bq_schema(schema),
write_disposition=write_disposition,
source_format=bq.SourceFormat.PARQUET,
),
)
for table, schema in TEST_TABLES.items()
)

for fut in concurrent.futures.as_completed(futures):
fut.result()
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def insert(self, obj, **kwargs):
class Backend(BaseBackend):
name = 'clickhouse'

# ClickHouse itself does, but the client driver does not
supports_temporary_tables = False

class Options(ibis.config.Config):
"""Clickhouse options.
Expand Down
15 changes: 9 additions & 6 deletions ibis/backends/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,18 @@ def native_bool(self) -> bool:

@classmethod
def service_spec(cls, data_dir: Path) -> ServiceSpec:
files = [data_dir.joinpath("functional_alltypes.parquet")]
files.extend(
data_dir.joinpath("parquet", name, f"{name}.parquet")
for name in ("diamonds", "batting", "awards_players")
)
return ServiceSpec(
name=cls.name(),
data_volume="/var/lib/clickhouse/user_files/ibis",
files=files,
files=[
data_dir.joinpath("parquet", f"{name}.parquet")
for name in (
"diamonds",
"batting",
"awards_players",
"functional_alltypes",
)
],
)

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion ibis/backends/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def test_cast_string_col(alltypes, translate, to_type, snapshot):
@pytest.mark.parametrize(
'column',
[
'index',
'id',
'bool_col',
'tinyint_col',
Expand Down
2 changes: 0 additions & 2 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
TEST_TABLES = {
"functional_alltypes": ibis.schema(
{
"index": "int64",
"Unnamed: 0": "int64",
"id": "int32",
"bool_col": "boolean",
"tinyint_col": "int8",
Expand Down
26 changes: 7 additions & 19 deletions ibis/backends/dask/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd
import pandas.testing as tm
import pytest
Expand Down Expand Up @@ -33,34 +32,23 @@ def connect(data_directory: Path):
return ibis.dask.connect(
{
"functional_alltypes": dd.from_pandas(
pd.read_csv(
data_directory / "functional_alltypes.csv",
index_col=None,
dtype={
"bool_col": bool,
"string_col": str,
"tinyint_col": np.int8,
"smallint_col": np.int16,
"int_col": np.int32,
"bigint_col": np.int64,
"float_col": np.float32,
"double_col": np.float64,
},
parse_dates=["timestamp_col"],
encoding="utf-8",
pd.read_parquet(
data_directory / "parquet" / "functional_alltypes.parquet"
),
npartitions=NPARTITIONS,
),
"batting": dd.from_pandas(
pd.read_csv(data_directory / "batting.csv"),
pd.read_parquet(data_directory / "parquet" / "batting.parquet"),
npartitions=NPARTITIONS,
),
"awards_players": dd.from_pandas(
pd.read_csv(data_directory / "awards_players.csv"),
pd.read_parquet(
data_directory / "parquet" / "awards_players.parquet"
),
npartitions=NPARTITIONS,
),
'diamonds': dd.from_pandas(
pd.read_csv(str(data_directory / 'diamonds.csv')),
pd.read_parquet(data_directory / "parquet" / "diamonds.parquet"),
npartitions=NPARTITIONS,
),
'json_t': dd.from_pandas(
Expand Down
11 changes: 2 additions & 9 deletions ibis/backends/dask/tests/execution/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,13 @@ def df(npartitions):

@pytest.fixture(scope='module')
def batting_df(data_directory):
df = dd.read_csv(
data_directory / 'batting.csv',
assume_missing=True,
dtype={"lgID": "object"},
)
df = dd.read_parquet(data_directory / 'parquet' / 'batting.parquet')
return df.sample(frac=0.01).reset_index(drop=True)


@pytest.fixture(scope='module')
def awards_players_df(data_directory):
return dd.read_csv(
data_directory / 'awards_players.csv',
assume_missing=True,
)
return dd.read_parquet(data_directory / 'parquet' / 'awards_players.parquet')


@pytest.fixture(scope='module')
Expand Down
15 changes: 9 additions & 6 deletions ibis/backends/datafusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,10 @@ def connect(data_directory: Path):
# csv file path
client = ibis.datafusion.connect({})
client.register(
data_directory / 'functional_alltypes.csv',
data_directory / "csv" / 'functional_alltypes.csv',
table_name='functional_alltypes',
schema=pa.schema(
[
('index', 'int64'),
('Unnamed 0', 'int64'),
('id', 'int64'),
('bool_col', 'int8'),
('tinyint_col', 'int8'),
Expand All @@ -50,11 +48,16 @@ def connect(data_directory: Path):
]
),
)
client.register(data_directory / 'batting.csv', table_name='batting')
client.register(
data_directory / 'awards_players.csv', table_name='awards_players'
data_directory / "parquet" / 'batting.parquet', table_name='batting'
)
client.register(
data_directory / "parquet" / 'awards_players.parquet',
table_name='awards_players',
)
client.register(
data_directory / "parquet" / 'diamonds.parquet', table_name='diamonds'
)
client.register(data_directory / 'diamonds.csv', table_name='diamonds')
return client

@property
Expand Down
9 changes: 4 additions & 5 deletions ibis/backends/druid/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,10 @@ class TestConf(ServiceBackendTest, RoundHalfToEven):

@classmethod
def service_spec(cls, data_dir: Path):
files = [data_dir.joinpath("functional_alltypes.parquet")]
files.extend(
data_dir.joinpath("parquet", name, f"{name}.parquet")
for name in ("diamonds", "batting", "awards_players")
)
files = [
data_dir.joinpath("parquet", f"{name}.parquet")
for name in ("diamonds", "batting", "awards_players", "functional_alltypes")
]
return ServiceSpec(
name="druid-coordinator", data_volume="/opt/shared", files=files
)
Expand Down
13 changes: 6 additions & 7 deletions ibis/backends/duckdb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,22 @@ class TestConf(BackendTest, RoundAwayFromZero):
def __init__(self, data_directory: Path, **kwargs: Any) -> None:
self.connection = self.connect(data_directory, **kwargs)

script_dir = data_directory.parent

schema = (script_dir / 'schema' / 'duckdb.sql').read_text()

if not SANDBOXED:
self.connection._load_extensions(
["httpfs", "postgres_scanner", "sqlite_scanner"]
)

script_dir = data_directory.parent
schema = script_dir.joinpath("schema", "duckdb.sql").read_text()

with self.connection.begin() as con:
for stmt in filter(None, map(str.strip, schema.split(';'))):
for stmt in filter(None, map(str.strip, schema.split(";"))):
con.exec_driver_sql(stmt)

for table in TEST_TABLES:
src = data_directory / f'{table}.csv'
src = data_directory / "csv" / f"{table}.csv"
con.exec_driver_sql(
f"COPY {table} FROM {str(src)!r} (DELIMITER ',', HEADER, SAMPLE_SIZE 1)"
f"COPY {table} FROM {str(src)!r} (DELIMITER ',', HEADER)"
)

@staticmethod
Expand Down
Loading

0 comments on commit 7ae2b24

Please sign in to comment.