diff --git a/ci/schema/druid.sql b/ci/schema/druid.sql
index 941a92fe133b..3c58ff394e24 100644
--- a/ci/schema/druid.sql
+++ b/ci/schema/druid.sql
@@ -41,7 +41,7 @@ FROM TABLE(
   EXTERN(
     '{"type":"local","files":["/opt/shared/functional_alltypes.parquet"]}',
     '{"type":"parquet"}',
-    '[{"name":"index","type":"long"},{"name":"Unnamed: 0","type":"long"},{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
+    '[{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
   )
 )
 PARTITIONED BY ALL TIME;
diff --git a/ci/schema/duckdb.sql b/ci/schema/duckdb.sql
index 5743cf07a19f..780b3b0b077d 100644
--- a/ci/schema/duckdb.sql
+++ b/ci/schema/duckdb.sql
@@ -46,8 +46,6 @@ CREATE OR REPLACE TABLE awards_players (
 );
 
 CREATE OR REPLACE TABLE functional_alltypes (
-    "index" BIGINT,
-    "Unnamed: 0" BIGINT,
     id INTEGER,
     bool_col BOOLEAN,
     tinyint_col SMALLINT,
diff --git a/ci/schema/mssql.sql b/ci/schema/mssql.sql
index 258c831450f0..31c73501a389 100644
--- a/ci/schema/mssql.sql
+++ b/ci/schema/mssql.sql
@@ -70,8 +70,6 @@ WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)
 DROP TABLE IF EXISTS functional_alltypes;
 
 CREATE TABLE functional_alltypes (
-    "index" BIGINT,
-    "Unnamed: 0" BIGINT,
     id INTEGER,
     bool_col BIT,
     tinyint_col SMALLINT,
@@ -91,8 +89,6 @@ BULK INSERT functional_alltypes
 FROM '/data/functional_alltypes.csv'
 WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)
 
-CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");
-
 DROP TABLE IF EXISTS win;
 
 CREATE TABLE win (g VARCHAR(MAX), x BIGINT, y BIGINT);
diff --git a/ci/schema/mysql.sql b/ci/schema/mysql.sql
index b5a5828e3685..0fec6beddb5d 100644
--- a/ci/schema/mysql.sql
+++ b/ci/schema/mysql.sql
@@ -54,8 +54,6 @@ CREATE TABLE awards_players (
 DROP TABLE IF EXISTS functional_alltypes;
 
 CREATE TABLE functional_alltypes (
-    `index` BIGINT,
-    `Unnamed: 0` BIGINT,
     id INTEGER,
     bool_col BOOLEAN,
     tinyint_col TINYINT,
@@ -71,8 +69,6 @@ CREATE TABLE functional_alltypes (
     month INTEGER
 ) DEFAULT CHARACTER SET = utf8;
 
-CREATE INDEX `ix_functional_alltypes_index` ON functional_alltypes (`index`);
-
 DROP TABLE IF EXISTS json_t CASCADE;
 
 CREATE TABLE IF NOT EXISTS json_t (js JSON);
diff --git a/ci/schema/postgresql.sql b/ci/schema/postgresql.sql
index 0aab33cb1cf5..9cf1b0c4429e 100644
--- a/ci/schema/postgresql.sql
+++ b/ci/schema/postgresql.sql
@@ -63,8 +63,6 @@ CREATE TABLE awards_players (
 DROP TABLE IF EXISTS functional_alltypes CASCADE;
 
 CREATE TABLE functional_alltypes (
-    "index" BIGINT,
-    "Unnamed: 0" BIGINT,
     id INTEGER,
     bool_col BOOLEAN,
     tinyint_col SMALLINT,
@@ -80,8 +78,6 @@ CREATE TABLE functional_alltypes (
     month INTEGER
 );
 
-CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");
-
 DROP TABLE IF EXISTS tzone CASCADE;
 
 CREATE TABLE tzone (
diff --git a/ci/schema/snowflake.sql b/ci/schema/snowflake.sql
index 1d3939ac2ad5..bb11c17b2d24 100644
--- a/ci/schema/snowflake.sql
+++ b/ci/schema/snowflake.sql
@@ -54,8 +54,6 @@ CREATE OR REPLACE TABLE awards_players (
 );
 
 CREATE OR REPLACE TABLE functional_alltypes (
-    "index" BIGINT,
-    "Unnamed: 0" BIGINT,
     "id" INTEGER,
     "bool_col" BOOLEAN,
     "tinyint_col" SMALLINT,
diff --git a/ci/schema/sqlite.sql b/ci/schema/sqlite.sql
index 1335f6b48ef3..fce560641cb3 100644
--- a/ci/schema/sqlite.sql
+++ b/ci/schema/sqlite.sql
@@ -1,8 +1,6 @@
 DROP TABLE IF EXISTS functional_alltypes;
 
 CREATE TABLE functional_alltypes (
-    "index" BIGINT,
-    "Unnamed: 0" BIGINT,
     id BIGINT,
     bool_col BOOLEAN,
     tinyint_col BIGINT,
@@ -19,8 +17,6 @@ CREATE TABLE functional_alltypes (
     CHECK (bool_col IN (0, 1))
 );
 
-CREATE INDEX ix_functional_alltypes_index ON "functional_alltypes" ("index");
-
 DROP TABLE IF EXISTS awards_players;
 
 CREATE TABLE awards_players (
diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py
index 269ae95e107c..94d4b18e288f 100644
--- a/ibis/backends/base/__init__.py
+++ b/ibis/backends/base/__init__.py
@@ -456,6 +456,8 @@ class BaseBackend(abc.ABC, _FileIOHandler):
     table_class: type[ops.DatabaseTable] = ops.DatabaseTable
     name: ClassVar[str]
 
+    supports_temporary_tables = False
+
     def __init__(self, *args, **kwargs):
         self._con_args: tuple[Any] = args
         self._con_kwargs: dict[str, Any] = kwargs
diff --git a/ibis/backends/bigquery/tests/conftest.py b/ibis/backends/bigquery/tests/conftest.py
index 8e93a490ba43..26f76a56764c 100644
--- a/ibis/backends/bigquery/tests/conftest.py
+++ b/ibis/backends/bigquery/tests/conftest.py
@@ -178,34 +178,17 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
                 )
             )
 
-            futures.append(
-                e.submit(
-                    make_job,
-                    client.load_table_from_file,
-                    io.BytesIO(data_dir.joinpath("struct_table.avro").read_bytes()),
-                    bq.TableReference(testing_dataset, "struct_table"),
-                    job_config=bq.LoadJobConfig(
-                        write_disposition=write_disposition,
-                        source_format=bq.SourceFormat.AVRO,
-                    ),
-                )
-            )
-
             futures.append(
                 e.submit(
                     make_job,
                     client.load_table_from_file,
                     io.BytesIO(
-                        data_dir.joinpath("functional_alltypes.csv").read_bytes()
+                        data_dir.joinpath("avro", "struct_table.avro").read_bytes()
                     ),
-                    functional_alltypes_parted,
+                    bq.TableReference(testing_dataset, "struct_table"),
                     job_config=bq.LoadJobConfig(
-                        schema=ibis_schema_to_bq_schema(
-                            TEST_TABLES["functional_alltypes"]
-                        ),
                         write_disposition=write_disposition,
-                        source_format=bq.SourceFormat.CSV,
-                        skip_leading_rows=1,
+                        source_format=bq.SourceFormat.AVRO,
                     ),
                 )
             )
@@ -264,21 +247,22 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
                 )
             )
 
-            for table, schema in TEST_TABLES.items():
-                futures.append(
-                    e.submit(
-                        make_job,
-                        client.load_table_from_file,
-                        io.BytesIO(data_dir.joinpath(f"{table}.csv").read_bytes()),
-                        bq.TableReference(testing_dataset, table),
-                        job_config=bq.LoadJobConfig(
-                            schema=ibis_schema_to_bq_schema(schema),
-                            write_disposition=bq.WriteDisposition.WRITE_TRUNCATE,
-                            source_format=bq.SourceFormat.CSV,
-                            skip_leading_rows=1,
-                        ),
-                    )
+            futures.extend(
+                e.submit(
+                    make_job,
+                    client.load_table_from_file,
+                    io.BytesIO(
+                        data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
+                    ),
+                    bq.TableReference(testing_dataset, table),
+                    job_config=bq.LoadJobConfig(
+                        schema=ibis_schema_to_bq_schema(schema),
+                        write_disposition=write_disposition,
+                        source_format=bq.SourceFormat.PARQUET,
+                    ),
                 )
+                for table, schema in TEST_TABLES.items()
+            )
 
             for fut in concurrent.futures.as_completed(futures):
                 fut.result()
diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py
index 1be8dfa0e4d7..9255f61a0443 100644
--- a/ibis/backends/clickhouse/__init__.py
+++ b/ibis/backends/clickhouse/__init__.py
@@ -74,6 +74,9 @@ def insert(self, obj, **kwargs):
 class Backend(BaseBackend):
     name = 'clickhouse'
 
+    # ClickHouse itself does, but the client driver does not
+    supports_temporary_tables = False
+
     class Options(ibis.config.Config):
         """Clickhouse options.
 
diff --git a/ibis/backends/clickhouse/tests/conftest.py b/ibis/backends/clickhouse/tests/conftest.py
index 90d355e1a22c..22f8e28fa411 100644
--- a/ibis/backends/clickhouse/tests/conftest.py
+++ b/ibis/backends/clickhouse/tests/conftest.py
@@ -38,15 +38,18 @@ def native_bool(self) -> bool:
 
     @classmethod
     def service_spec(cls, data_dir: Path) -> ServiceSpec:
-        files = [data_dir.joinpath("functional_alltypes.parquet")]
-        files.extend(
-            data_dir.joinpath("parquet", name, f"{name}.parquet")
-            for name in ("diamonds", "batting", "awards_players")
-        )
         return ServiceSpec(
             name=cls.name(),
             data_volume="/var/lib/clickhouse/user_files/ibis",
-            files=files,
+            files=[
+                data_dir.joinpath("parquet", f"{name}.parquet")
+                for name in (
+                    "diamonds",
+                    "batting",
+                    "awards_players",
+                    "functional_alltypes",
+                )
+            ],
         )
 
     @staticmethod
diff --git a/ibis/backends/clickhouse/tests/test_functions.py b/ibis/backends/clickhouse/tests/test_functions.py
index aec517cb898e..a3c7bcfff860 100644
--- a/ibis/backends/clickhouse/tests/test_functions.py
+++ b/ibis/backends/clickhouse/tests/test_functions.py
@@ -42,7 +42,6 @@ def test_cast_string_col(alltypes, translate, to_type, snapshot):
 @pytest.mark.parametrize(
     'column',
     [
-        'index',
         'id',
         'bool_col',
         'tinyint_col',
diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py
index 7e308f4db8d9..5ba957d00dee 100644
--- a/ibis/backends/conftest.py
+++ b/ibis/backends/conftest.py
@@ -34,8 +34,6 @@
 TEST_TABLES = {
     "functional_alltypes": ibis.schema(
         {
-            "index": "int64",
-            "Unnamed: 0": "int64",
             "id": "int32",
             "bool_col": "boolean",
             "tinyint_col": "int8",
diff --git a/ibis/backends/dask/tests/conftest.py b/ibis/backends/dask/tests/conftest.py
index 2f1f38f6bd34..670b9893afe1 100644
--- a/ibis/backends/dask/tests/conftest.py
+++ b/ibis/backends/dask/tests/conftest.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 from typing import Any
 
-import numpy as np
 import pandas as pd
 import pandas.testing as tm
 import pytest
@@ -33,34 +32,23 @@ def connect(data_directory: Path):
         return ibis.dask.connect(
             {
                 "functional_alltypes": dd.from_pandas(
-                    pd.read_csv(
-                        data_directory / "functional_alltypes.csv",
-                        index_col=None,
-                        dtype={
-                            "bool_col": bool,
-                            "string_col": str,
-                            "tinyint_col": np.int8,
-                            "smallint_col": np.int16,
-                            "int_col": np.int32,
-                            "bigint_col": np.int64,
-                            "float_col": np.float32,
-                            "double_col": np.float64,
-                        },
-                        parse_dates=["timestamp_col"],
-                        encoding="utf-8",
+                    pd.read_parquet(
+                        data_directory / "parquet" / "functional_alltypes.parquet"
                     ),
                     npartitions=NPARTITIONS,
                 ),
                 "batting": dd.from_pandas(
-                    pd.read_csv(data_directory / "batting.csv"),
+                    pd.read_parquet(data_directory / "parquet" / "batting.parquet"),
                     npartitions=NPARTITIONS,
                 ),
                 "awards_players": dd.from_pandas(
-                    pd.read_csv(data_directory / "awards_players.csv"),
+                    pd.read_parquet(
+                        data_directory / "parquet" / "awards_players.parquet"
+                    ),
                     npartitions=NPARTITIONS,
                 ),
                 'diamonds': dd.from_pandas(
-                    pd.read_csv(str(data_directory / 'diamonds.csv')),
+                    pd.read_parquet(data_directory / "parquet" / "diamonds.parquet"),
                     npartitions=NPARTITIONS,
                 ),
                 'json_t': dd.from_pandas(
diff --git a/ibis/backends/dask/tests/execution/conftest.py b/ibis/backends/dask/tests/execution/conftest.py
index a1d6e1e39506..0d4f860d866f 100644
--- a/ibis/backends/dask/tests/execution/conftest.py
+++ b/ibis/backends/dask/tests/execution/conftest.py
@@ -63,20 +63,13 @@ def df(npartitions):
 
 @pytest.fixture(scope='module')
 def batting_df(data_directory):
-    df = dd.read_csv(
-        data_directory / 'batting.csv',
-        assume_missing=True,
-        dtype={"lgID": "object"},
-    )
+    df = dd.read_parquet(data_directory / 'parquet' / 'batting.parquet')
     return df.sample(frac=0.01).reset_index(drop=True)
 
 
 @pytest.fixture(scope='module')
 def awards_players_df(data_directory):
-    return dd.read_csv(
-        data_directory / 'awards_players.csv',
-        assume_missing=True,
-    )
+    return dd.read_parquet(data_directory / 'parquet' / 'awards_players.parquet')
 
 
 @pytest.fixture(scope='module')
diff --git a/ibis/backends/datafusion/tests/conftest.py b/ibis/backends/datafusion/tests/conftest.py
index c1ef4100ae36..26df4bba11b0 100644
--- a/ibis/backends/datafusion/tests/conftest.py
+++ b/ibis/backends/datafusion/tests/conftest.py
@@ -28,12 +28,10 @@ def connect(data_directory: Path):
         #   csv file path
         client = ibis.datafusion.connect({})
         client.register(
-            data_directory / 'functional_alltypes.csv',
+            data_directory / "csv" / 'functional_alltypes.csv',
             table_name='functional_alltypes',
             schema=pa.schema(
                 [
-                    ('index', 'int64'),
-                    ('Unnamed 0', 'int64'),
                     ('id', 'int64'),
                     ('bool_col', 'int8'),
                     ('tinyint_col', 'int8'),
@@ -50,11 +48,16 @@ def connect(data_directory: Path):
                 ]
             ),
         )
-        client.register(data_directory / 'batting.csv', table_name='batting')
         client.register(
-            data_directory / 'awards_players.csv', table_name='awards_players'
+            data_directory / "parquet" / 'batting.parquet', table_name='batting'
+        )
+        client.register(
+            data_directory / "parquet" / 'awards_players.parquet',
+            table_name='awards_players',
+        )
+        client.register(
+            data_directory / "parquet" / 'diamonds.parquet', table_name='diamonds'
         )
-        client.register(data_directory / 'diamonds.csv', table_name='diamonds')
         return client
 
     @property
diff --git a/ibis/backends/druid/tests/conftest.py b/ibis/backends/druid/tests/conftest.py
index a9a57a291821..81b8fc62900d 100644
--- a/ibis/backends/druid/tests/conftest.py
+++ b/ibis/backends/druid/tests/conftest.py
@@ -99,11 +99,10 @@ class TestConf(ServiceBackendTest, RoundHalfToEven):
 
     @classmethod
     def service_spec(cls, data_dir: Path):
-        files = [data_dir.joinpath("functional_alltypes.parquet")]
-        files.extend(
-            data_dir.joinpath("parquet", name, f"{name}.parquet")
-            for name in ("diamonds", "batting", "awards_players")
-        )
+        files = [
+            data_dir.joinpath("parquet", f"{name}.parquet")
+            for name in ("diamonds", "batting", "awards_players", "functional_alltypes")
+        ]
         return ServiceSpec(
             name="druid-coordinator", data_volume="/opt/shared", files=files
         )
diff --git a/ibis/backends/duckdb/tests/conftest.py b/ibis/backends/duckdb/tests/conftest.py
index c5acca0fbe3c..2533d21a44d6 100644
--- a/ibis/backends/duckdb/tests/conftest.py
+++ b/ibis/backends/duckdb/tests/conftest.py
@@ -19,23 +19,22 @@ class TestConf(BackendTest, RoundAwayFromZero):
     def __init__(self, data_directory: Path, **kwargs: Any) -> None:
         self.connection = self.connect(data_directory, **kwargs)
 
-        script_dir = data_directory.parent
-
-        schema = (script_dir / 'schema' / 'duckdb.sql').read_text()
-
         if not SANDBOXED:
             self.connection._load_extensions(
                 ["httpfs", "postgres_scanner", "sqlite_scanner"]
             )
 
+        script_dir = data_directory.parent
+        schema = script_dir.joinpath("schema", "duckdb.sql").read_text()
+
         with self.connection.begin() as con:
-            for stmt in filter(None, map(str.strip, schema.split(';'))):
+            for stmt in filter(None, map(str.strip, schema.split(";"))):
                 con.exec_driver_sql(stmt)
 
             for table in TEST_TABLES:
-                src = data_directory / f'{table}.csv'
+                src = data_directory / "csv" / f"{table}.csv"
                 con.exec_driver_sql(
-                    f"COPY {table} FROM {str(src)!r} (DELIMITER ',', HEADER, SAMPLE_SIZE 1)"
+                    f"COPY {table} FROM {str(src)!r} (DELIMITER ',', HEADER)"
                 )
 
     @staticmethod
diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py
index b1719bb9d752..442af280e248 100644
--- a/ibis/backends/duckdb/tests/test_register.py
+++ b/ibis/backends/duckdb/tests/test_register.py
@@ -17,12 +17,12 @@
 
 
 def test_read_csv(data_directory):
-    t = ibis.read_csv(data_directory / "functional_alltypes.csv")
+    t = ibis.read_csv(data_directory / "csv" / "functional_alltypes.csv")
     assert t.count().execute()
 
 
 def test_read_parquet(data_directory):
-    t = ibis.read_parquet(data_directory / "functional_alltypes.parquet")
+    t = ibis.read_parquet(data_directory / "parquet" / "functional_alltypes.parquet")
     assert t.count().execute()
 
 
@@ -30,7 +30,7 @@ def test_read_parquet(data_directory):
     duckdb=["duckdb<0.7.0"], reason="read_json_auto doesn't exist", raises=exc.IbisError
 )
 def test_read_json(data_directory, tmp_path):
-    pqt = ibis.read_parquet(data_directory / "functional_alltypes.parquet")
+    pqt = ibis.read_parquet(data_directory / "parquet" / "functional_alltypes.parquet")
 
     path = tmp_path.joinpath("ft.json")
     path.write_text(pqt.execute().to_json(orient="records", lines=True))
@@ -142,15 +142,22 @@ def test_register_sqlite(con, tmp_path):
     reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
     raises=duckdb.IOException,
 )
-def test_attach_sqlite(data_directory):
+def test_attach_sqlite(data_directory, tmp_path):
+    import sqlite3
+
+    test_db_path = tmp_path / "test.db"
+    with sqlite3.connect(test_db_path) as scon:
+        for line in (
+            Path(data_directory.parent / "schema" / "sqlite.sql").read_text().split(";")
+        ):
+            scon.execute(line)
+
     # Create a new connection here because we already have the `ibis_testing`
     # tables loaded in to the `con` fixture.
     con = ibis.duckdb.connect()
 
-    sqlite_db = data_directory / "ibis_testing.db"
-
-    con.attach_sqlite(sqlite_db)
-    assert set(con.list_tables()) == {
+    con.attach_sqlite(test_db_path)
+    assert set(con.list_tables()) >= {
         "functional_alltypes",
         "awards_players",
         "batting",
@@ -161,8 +168,8 @@ def test_attach_sqlite(data_directory):
     assert len(set(fa.schema().types)) > 1
 
     # overwrite existing sqlite_db and force schema to all strings
-    con.attach_sqlite(sqlite_db, overwrite=True, all_varchar=True)
-    assert set(con.list_tables()) == {
+    con.attach_sqlite(test_db_path, overwrite=True, all_varchar=True)
+    assert set(con.list_tables()) >= {
         "functional_alltypes",
         "awards_players",
         "batting",
diff --git a/ibis/backends/impala/tests/conftest.py b/ibis/backends/impala/tests/conftest.py
index 0f499c980aac..4febfd20e5f8 100644
--- a/ibis/backends/impala/tests/conftest.py
+++ b/ibis/backends/impala/tests/conftest.py
@@ -1,21 +1,21 @@
 from __future__ import annotations
 
 import ast
-import collections
 import concurrent.futures
 import contextlib
 import itertools
+import operator
 import os
 import subprocess
 from pathlib import Path
-from typing import Any, Iterator
+from typing import Any
 
 import pytest
+import toolz
 
 import ibis
 import ibis.expr.types as ir
 from ibis import options, util
-from ibis.backends.base import BaseBackend
 from ibis.backends.conftest import TEST_TABLES
 from ibis.backends.impala.compiler import ImpalaCompiler, ImpalaExprTranslator
 from ibis.backends.tests.base import BackendTest, RoundAwayFromZero, UnorderedComparator
@@ -46,18 +46,7 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
         fsspec = pytest.importorskip("fsspec")
         fs = fsspec.filesystem("file")
 
-        data_files = {
-            data_file
-            for data_file in fs.find(data_dir)
-            # ignore sqlite databases and markdown files
-            if not data_file.endswith((".db", ".md"))
-            # ignore files in the test data .git directory
-            if (
-                # ignore .git
-                os.path.relpath(data_file, data_dir).split(os.sep, 1)[0]
-                != ".git"
-            )
-        }
+        data_files = fs.find(data_dir / "impala")
 
         # without setting the pool size
         # connections are dropped from the urllib3
@@ -66,6 +55,7 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
         URLLIB_DEFAULT_POOL_SIZE = 10
 
         env = IbisTestEnv()
+        futures = []
         with contextlib.closing(
             ibis.impala.connect(
                 host=env.impala_host,
@@ -110,14 +100,42 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
             for future in concurrent.futures.as_completed(tasks):
                 future.result()
 
-            # create the tables and compute stats
-            for future in concurrent.futures.as_completed(
-                executor.submit(table_future.result().compute_stats)
-                for table_future in concurrent.futures.as_completed(
-                    impala_create_tables(con, env, executor=executor)
+            # create tables and compute stats
+            compute_stats = operator.methodcaller("compute_stats")
+            futures.append(
+                executor.submit(
+                    toolz.compose(compute_stats, con.avro_file),
+                    os.path.join(env.test_data_dir, 'impala', 'avro', 'tpch', 'region'),
+                    avro_schema={
+                        "type": "record",
+                        "name": "a",
+                        "fields": [
+                            {"name": "R_REGIONKEY", "type": ["null", "int"]},
+                            {"name": "R_NAME", "type": ["null", "string"]},
+                            {"name": "R_COMMENT", "type": ["null", "string"]},
+                        ],
+                    },
+                    name="tpch_region_avro",
+                    database=env.test_data_db,
+                    persist=True,
                 )
-            ):
-                future.result()
+            )
+
+            futures.extend(
+                executor.submit(
+                    toolz.compose(compute_stats, con.parquet_file),
+                    path,
+                    name=os.path.basename(path),
+                    database=env.test_data_db,
+                    persist=True,
+                    schema=TEST_TABLES.get(os.path.basename(path)),
+                )
+                for path in con.hdfs.ls(
+                    os.path.join(env.test_data_dir, 'impala', 'parquet')
+                )
+            )
+            for fut in concurrent.futures.as_completed(futures):
+                fut.result()
 
     @staticmethod
     def connect(
@@ -427,17 +445,17 @@ def impala_create_test_database(con, env):
     con.create_table(
         'alltypes',
         schema=ibis.schema(
-            [
-                ('a', 'int8'),
-                ('b', 'int16'),
-                ('c', 'int32'),
-                ('d', 'int64'),
-                ('e', 'float'),
-                ('f', 'double'),
-                ('g', 'string'),
-                ('h', 'boolean'),
-                ('i', 'timestamp'),
-            ]
+            dict(
+                a='int8',
+                b='int16',
+                c='int32',
+                d='int64',
+                e='float',
+                f='double',
+                g='string',
+                h='boolean',
+                i='timestamp',
+            )
         ),
         database=env.test_data_db,
     )
@@ -447,67 +465,3 @@ def impala_create_test_database(con, env):
         database=env.test_data_db,
     )
     con.table("win", database=env.test_data_db).insert(win, overwrite=True)
-
-
-PARQUET_SCHEMAS = {
-    "functional_alltypes": ibis.schema(
-        {
-            name: dtype
-            for name, dtype in TEST_TABLES["functional_alltypes"].items()
-            if name not in {"index", "Unnamed: 0"}
-        }
-    ),
-    "tpch_region": ibis.schema(
-        [
-            ("r_regionkey", "int16"),
-            ("r_name", "string"),
-            ("r_comment", "string"),
-        ]
-    ),
-}
-
-PARQUET_SCHEMAS.update(
-    (table, schema)
-    for table, schema in TEST_TABLES.items()
-    if table != "functional_alltypes"
-)
-
-AVRO_SCHEMAS = {
-    "tpch_region_avro": {
-        "type": "record",
-        "name": "a",
-        "fields": [
-            {"name": "R_REGIONKEY", "type": ["null", "int"]},
-            {"name": "R_NAME", "type": ["null", "string"]},
-            {"name": "R_COMMENT", "type": ["null", "string"]},
-        ],
-    }
-}
-
-ALL_SCHEMAS = collections.ChainMap(PARQUET_SCHEMAS, AVRO_SCHEMAS)
-
-
-def impala_create_tables(
-    con: BaseBackend,
-    env: IbisTestEnv,
-    *,
-    executor: concurrent.futures.Executor,
-) -> Iterator[concurrent.futures.Future]:
-    test_data_dir = env.test_data_dir
-    avro_files = [
-        (con.avro_file, os.path.join(test_data_dir, 'avro', path))
-        for path in con.hdfs.ls(os.path.join(test_data_dir, 'avro'))
-    ]
-    parquet_files = [
-        (con.parquet_file, os.path.join(test_data_dir, 'parquet', path))
-        for path in con.hdfs.ls(os.path.join(test_data_dir, 'parquet'))
-    ]
-    for method, path in itertools.chain(parquet_files, avro_files):
-        yield executor.submit(
-            method,
-            path,
-            ALL_SCHEMAS.get(os.path.basename(path)),
-            name=os.path.basename(path),
-            database=env.test_data_db,
-            persist=True,
-        )
diff --git a/ibis/backends/impala/tests/test_client.py b/ibis/backends/impala/tests/test_client.py
index 1603903d2313..04d85843f305 100644
--- a/ibis/backends/impala/tests/test_client.py
+++ b/ibis/backends/impala/tests/test_client.py
@@ -43,9 +43,9 @@ def test_get_table_ref(db):
 
 
 def test_run_sql(con, test_data_db):
-    table = con.sql(f"SELECT li.* FROM {test_data_db}.tpch_lineitem li")
+    table = con.sql(f"SELECT li.* FROM {test_data_db}.lineitem li")
 
-    li = con.table('tpch_lineitem')
+    li = con.table('lineitem')
     assert isinstance(table, ir.Table)
     assert_equal(table.schema(), li.schema())
 
@@ -76,8 +76,8 @@ def test_explain(con):
 
 
 def test_get_schema(con, test_data_db):
-    t = con.table('tpch_lineitem')
-    schema = con.get_schema('tpch_lineitem', database=test_data_db)
+    t = con.table('lineitem')
+    schema = con.get_schema('lineitem', database=test_data_db)
     assert_equal(t.schema(), schema)
 
 
@@ -112,7 +112,7 @@ def test_adapt_scalar_array_results(con, alltypes):
 
 
 def test_interactive_repr_call_failure(con):
-    t = con.table('tpch_lineitem').limit(100000)
+    t = con.table('lineitem').limit(100000)
 
     t = t[t, t.l_receiptdate.cast('timestamp').name('date')]
 
@@ -155,17 +155,17 @@ def test_verbose_log_queries(con, test_data_db):
 
     with config.option_context('verbose', True):
         with config.option_context('verbose_log', queries.append):
-            con.table('tpch_orders', database=test_data_db)
+            con.table('orders', database=test_data_db)
 
     # we can't make assertions about the length of queries, since the Python GC
     # could've collected a temporary pandas table any time between construction
     # of `queries` and the assertion
-    expected = f'DESCRIBE {test_data_db}.`tpch_orders`'
+    expected = f'DESCRIBE {test_data_db}.`orders`'
     assert expected in queries
 
 
 def test_sql_query_limits(con, test_data_db):
-    table = con.table('tpch_nation', database=test_data_db)
+    table = con.table('nation', database=test_data_db)
     with config.option_context('sql.default_limit', 100000):
         # table has 25 rows
         assert len(table.execute()) == 25
@@ -206,7 +206,7 @@ def test_database_default_current_database(con):
 
 
 def test_close_drops_temp_tables(con, test_data_dir):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
 
     table = con.parquet_file(hdfs_path)
 
diff --git a/ibis/backends/impala/tests/test_ddl.py b/ibis/backends/impala/tests/test_ddl.py
index 1127126054c5..67f2cc4f9abd 100644
--- a/ibis/backends/impala/tests/test_ddl.py
+++ b/ibis/backends/impala/tests/test_ddl.py
@@ -208,7 +208,7 @@ def test_rename_table(con, temp_database):
     tmp_db = temp_database
 
     orig_name = 'tmp_rename_test'
-    con.create_table(orig_name, con.table('tpch_region'))
+    con.create_table(orig_name, con.table('region'))
     table = con.table(orig_name)
 
     old_name = table.name
@@ -277,7 +277,7 @@ def test_change_format(con, table):
 
 
 def test_query_avro(con, test_data_dir, tmp_db):
-    hdfs_path = pjoin(test_data_dir, 'avro/tpch_region_avro')
+    hdfs_path = pjoin(test_data_dir, 'impala/avro/tpch/region')
 
     avro_schema = {
         "fields": [
@@ -372,7 +372,7 @@ def limit(con, hdfs_path, offset):
         return t.order_by(t.r_regionkey).limit(1, offset=offset).execute()
 
     nthreads = multiprocessing.cpu_count()
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
 
     num_rows = int(con.parquet_file(hdfs_path).count().execute())
     with concurrent.futures.ThreadPoolExecutor(max_workers=nthreads) as e:
diff --git a/ibis/backends/impala/tests/test_exprs.py b/ibis/backends/impala/tests/test_exprs.py
index 8499c32ddbde..f7841cbfa455 100644
--- a/ibis/backends/impala/tests/test_exprs.py
+++ b/ibis/backends/impala/tests/test_exprs.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import pandas.testing as tm
 import pytest
+from pytest import param
 
 import ibis
 import ibis.expr.types as ir
@@ -19,7 +20,7 @@ def test_embedded_identifier_quoting(alltypes):
 
 
 def test_decimal_metadata(con):
-    table = con.table('tpch_lineitem')
+    table = con.table('lineitem')
 
     expr = table.l_quantity
     assert expr.type().precision == 12
@@ -171,15 +172,20 @@ def test_int_builtins(con, expr, expected):
     assert result == expected, ImpalaCompiler.to_sql(expr)
 
 
-def test_column_types(alltypes):
-    df = alltypes.execute()
-    assert df.tinyint_col.dtype.name == 'int8'
-    assert df.smallint_col.dtype.name == 'int16'
-    assert df.int_col.dtype.name == 'int32'
-    assert df.bigint_col.dtype.name == 'int64'
-    assert df.float_col.dtype.name == 'float32'
-    assert df.double_col.dtype.name == 'float64'
-    assert df.timestamp_col.dtype.name == 'datetime64[ns]'
+@pytest.mark.parametrize(
+    ("col", "expected"),
+    [
+        param("tinyint_col", "int8", id="tinyint"),
+        param("smallint_col", "int16", id="smallint"),
+        param("int_col", "int32", id="int"),
+        param("bigint_col", "int64", id="bigint"),
+        param("float_col", "float32", id="float"),
+        param("double_col", "float64", id="double"),
+        param("timestamp_col", "datetime64[ns]", id="timestamp"),
+    ],
+)
+def test_column_types(alltypes_df, col, expected):
+    assert alltypes_df[col].dtype.name == expected
 
 
 @pytest.mark.parametrize(
@@ -337,7 +343,7 @@ def test_div_floordiv(con, expr, expected):
 
 
 def test_filter_predicates(con):
-    t = con.table('tpch_nation')
+    t = con.table('nation')
 
     predicates = [
         lambda x: x.n_name.lower().like('%ge%'),
@@ -366,7 +372,7 @@ def test_casted_expr_impala_bug(alltypes):
 
 
 def test_decimal_timestamp_builtins(con):
-    table = con.table('tpch_lineitem')
+    table = con.table('lineitem')
 
     dc = table.l_quantity
     ts = table.l_receiptdate.cast('timestamp')
@@ -520,10 +526,10 @@ def test_anti_join_self_reference_works(con, alltypes):
 
 
 def test_tpch_self_join_failure(con):
-    region = con.table('tpch_region')
-    nation = con.table('tpch_nation')
-    customer = con.table('tpch_customer')
-    orders = con.table('tpch_orders')
+    region = con.table('region')
+    nation = con.table('nation')
+    customer = con.table('customer')
+    orders = con.table('orders')
 
     fields_of_interest = [
         region.r_name.name('region'),
@@ -557,10 +563,10 @@ def test_tpch_self_join_failure(con):
 
 def test_tpch_correlated_subquery_failure(con):
     # #183 and other issues
-    region = con.table('tpch_region')
-    nation = con.table('tpch_nation')
-    customer = con.table('tpch_customer')
-    orders = con.table('tpch_orders')
+    region = con.table('region')
+    nation = con.table('nation')
+    customer = con.table('customer')
+    orders = con.table('orders')
 
     fields_of_interest = [
         customer,
diff --git a/ibis/backends/impala/tests/test_parquet_ddl.py b/ibis/backends/impala/tests/test_parquet_ddl.py
index 3e81093a6737..83a7496231b2 100644
--- a/ibis/backends/impala/tests/test_parquet_ddl.py
+++ b/ibis/backends/impala/tests/test_parquet_ddl.py
@@ -12,7 +12,7 @@
 
 
 def test_cleanup_tmp_table_on_gc(con, test_data_dir):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
     table = con.parquet_file(hdfs_path)
     name = table.op().name
     table = None
@@ -21,7 +21,7 @@ def test_cleanup_tmp_table_on_gc(con, test_data_dir):
 
 
 def test_persist_parquet_file_with_name(con, test_data_dir, temp_table_db):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
 
     tmp_db, name = temp_table_db
     schema = ibis.schema(
@@ -39,7 +39,7 @@ def test_persist_parquet_file_with_name(con, test_data_dir, temp_table_db):
 
 
 def test_query_parquet_file_with_schema(con, test_data_dir):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
 
     schema = ibis.schema(
         [
@@ -63,23 +63,23 @@ def test_query_parquet_file_with_schema(con, test_data_dir):
 
 
 def test_query_parquet_file_like_table(con, test_data_dir):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
 
     ex_schema = ibis.schema(
         [
-            ('r_regionkey', 'int16'),
+            ('r_regionkey', 'int32'),
             ('r_name', 'string'),
             ('r_comment', 'string'),
         ]
     )
 
-    table = con.parquet_file(hdfs_path, like_table='tpch_region')
+    table = con.parquet_file(hdfs_path, like_table='region')
 
     assert_equal(table.schema(), ex_schema)
 
 
 def test_query_parquet_infer_schema(con, test_data_dir):
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
     table = con.parquet_file(hdfs_path)
 
     # NOTE: the actual schema should have an int16, but bc this is being
@@ -99,7 +99,7 @@ def test_query_parquet_infer_schema(con, test_data_dir):
 def test_create_table_persist_fails_if_called_twice(con, temp_table_db, test_data_dir):
     tmp_db, tname = temp_table_db
 
-    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')
+    hdfs_path = pjoin(test_data_dir, 'impala/parquet/region')
     con.parquet_file(hdfs_path, name=tname, persist=True, database=tmp_db)
 
     with pytest.raises(HS2Error):
diff --git a/ibis/backends/impala/tests/test_udf.py b/ibis/backends/impala/tests/test_udf.py
index 8a394d09e8fa..3cc52ac78e0e 100644
--- a/ibis/backends/impala/tests/test_udf.py
+++ b/ibis/backends/impala/tests/test_udf.py
@@ -70,7 +70,7 @@ def t(table):
 
 @pytest.fixture
 def tpch_customer(con):
-    return con.table("tpch_customer")
+    return con.table("customer")
 
 
 @pytest.fixture
@@ -320,7 +320,7 @@ def test_identity_primitive_types(
     reason='Unknown reason. xfailing to restore the CI for udf tests. #2358'
 )
 def test_decimal_fail(udfcon, test_data_db, udf_ll):
-    col = udfcon.table('tpch_customer').c_acctbal
+    col = udfcon.table('customer').c_acctbal
     literal = ibis.literal(1).cast('decimal(12,2)')
     name = '__tmp_udf_' + util.guid()
 
diff --git a/ibis/backends/mssql/tests/conftest.py b/ibis/backends/mssql/tests/conftest.py
index 0e5dc197ae9f..3341c753b291 100644
--- a/ibis/backends/mssql/tests/conftest.py
+++ b/ibis/backends/mssql/tests/conftest.py
@@ -35,7 +35,7 @@ def service_spec(cls, data_dir: Path):
             name=cls.name(),
             data_volume="/data",
             files=[
-                data_dir.joinpath(f"{name}.csv")
+                data_dir.joinpath("csv", f"{name}.csv")
                 for name in (
                     "diamonds",
                     "batting",
diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py
index d90c3efbd9fa..473765970efa 100644
--- a/ibis/backends/mysql/__init__.py
+++ b/ibis/backends/mysql/__init__.py
@@ -71,8 +71,6 @@ def do_connect(
         MySQLTable[table]
           name: functional_alltypes
           schema:
-            index : int64
-            Unnamed: 0 : int64
             id : int32
             bool_col : int8
             tinyint_col : int8
diff --git a/ibis/backends/mysql/tests/conftest.py b/ibis/backends/mysql/tests/conftest.py
index 14e691e48cc1..cfa44625f195 100644
--- a/ibis/backends/mysql/tests/conftest.py
+++ b/ibis/backends/mysql/tests/conftest.py
@@ -84,7 +84,7 @@ def _load_data(
             )
             with engine.begin() as con:
                 for table in TEST_TABLES:
-                    csv_path = data_dir / f"{table}.csv"
+                    csv_path = data_dir / "csv" / f"{table}.csv"
                     lines = [
                         f"LOAD DATA LOCAL INFILE {str(csv_path)!r}",
                         f"INTO TABLE {table}",
diff --git a/ibis/backends/pandas/tests/conftest.py b/ibis/backends/pandas/tests/conftest.py
index 85a1ad061b37..3f615e915d8f 100644
--- a/ibis/backends/pandas/tests/conftest.py
+++ b/ibis/backends/pandas/tests/conftest.py
@@ -2,12 +2,11 @@
 
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 
 import ibis
 import ibis.expr.operations as ops
-import ibis.expr.types as ir
+from ibis.backends.conftest import TEST_TABLES
 from ibis.backends.tests.base import BackendTest, RoundHalfToEven
 from ibis.backends.tests.data import array_types, json_types, struct_types, win
 
@@ -23,40 +22,15 @@ class TestConf(BackendTest, RoundHalfToEven):
     def connect(data_directory: Path):
         return ibis.pandas.connect(
             dictionary={
-                "functional_alltypes": pd.read_csv(
-                    data_directory / "functional_alltypes.csv",
-                    index_col=None,
-                    dtype={
-                        "bool_col": bool,
-                        "string_col": str,
-                        "tinyint_col": np.int8,
-                        "smallint_col": np.int16,
-                        "int_col": np.int32,
-                        "bigint_col": np.int64,
-                        "float_col": np.float32,
-                        "double_col": np.float64,
-                    },
-                    parse_dates=["timestamp_col"],
-                    encoding="utf-8",
-                ),
-                "batting": pd.read_csv(data_directory / "batting.csv"),
-                "awards_players": pd.read_csv(data_directory / "awards_players.csv"),
-                'diamonds': pd.read_csv(str(data_directory / 'diamonds.csv')),
+                **{
+                    table: pd.read_parquet(
+                        data_directory / "parquet" / f"{table}.parquet"
+                    )
+                    for table in TEST_TABLES.keys()
+                },
                 'struct': struct_types,
                 'json_t': json_types,
                 'array_types': array_types,
                 'win': win,
             }
         )
-
-    @property
-    def functional_alltypes(self) -> ir.Table:
-        return self.connection.table("functional_alltypes")
-
-    @property
-    def batting(self) -> ir.Table:
-        return self.connection.table("batting")
-
-    @property
-    def awards_players(self) -> ir.Table:
-        return self.connection.table("awards_players")
diff --git a/ibis/backends/pandas/tests/execution/conftest.py b/ibis/backends/pandas/tests/execution/conftest.py
index b03bf4f0e343..dd08aa6e7a91 100644
--- a/ibis/backends/pandas/tests/execution/conftest.py
+++ b/ibis/backends/pandas/tests/execution/conftest.py
@@ -76,24 +76,15 @@ def df():
 def batting_df(data_directory):
     num_rows = 1000
     start_index = 30
-    df = pd.read_csv(
-        data_directory / 'batting.csv',
-        index_col=None,
-        sep=',',
-        header=0,
-        skiprows=range(1, start_index + 1),
-        nrows=num_rows,
-    )
+    df = pd.read_parquet(data_directory / 'parquet' / 'batting.parquet').iloc[
+        start_index : start_index + num_rows
+    ]
     return df.reset_index(drop=True)
 
 
 @pytest.fixture(scope='module')
 def awards_players_df(data_directory):
-    return pd.read_csv(
-        data_directory / 'awards_players.csv',
-        index_col=None,
-        sep=',',
-    )
+    return pd.read_parquet(data_directory / 'parquet' / 'awards_players.parquet')
 
 
 @pytest.fixture(scope='module')
diff --git a/ibis/backends/polars/tests/conftest.py b/ibis/backends/polars/tests/conftest.py
index e0fffc5a3eb7..34263fef1659 100644
--- a/ibis/backends/polars/tests/conftest.py
+++ b/ibis/backends/polars/tests/conftest.py
@@ -5,7 +5,6 @@
 import pytest
 
 import ibis
-import ibis.expr.types as ir
 from ibis.backends.tests.base import BackendTest, RoundAwayFromZero
 from ibis.backends.tests.data import array_types, struct_types, win
 
@@ -21,46 +20,25 @@ class TestConf(BackendTest, RoundAwayFromZero):
     def connect(data_directory: Path):
         client = ibis.polars.connect({})
         client.register(
-            data_directory / 'functional_alltypes.csv',
+            data_directory / 'parquet' / 'functional_alltypes.parquet',
             table_name='functional_alltypes',
-            dtypes={
-                'index': pl.Int64,
-                'Unnamed 0': pl.Int64,
-                'id': pl.Int64,
-                'bool_col': pl.Int64,
-                'tinyint_col': pl.Int64,
-                'smallint_col': pl.Int64,
-                'int_col': pl.Int32,
-                'bigint_col': pl.Int64,
-                'float_col': pl.Float32,
-                'double_col': pl.Float64,
-                'date_string_col': pl.Utf8,
-                'string_col': pl.Utf8,
-                'timestamp_col': pl.Datetime,
-                'year': pl.Int64,
-                'month': pl.Int64,
-            },
         )
-        client.register(data_directory / 'batting.csv', table_name='batting')
         client.register(
-            data_directory / 'awards_players.csv', table_name='awards_players'
+            data_directory / "parquet" / 'batting.parquet', table_name='batting'
+        )
+        client.register(
+            data_directory / "parquet" / 'awards_players.parquet',
+            table_name='awards_players',
+        )
+        client.register(
+            data_directory / "parquet" / 'diamonds.parquet', table_name='diamonds'
         )
-        client.register(data_directory / 'diamonds.csv', table_name='diamonds')
         client.register(array_types, table_name='array_types')
         client.register(struct_types, table_name='struct')
         client.register(win, table_name="win")
 
         return client
 
-    @property
-    def functional_alltypes(self) -> ir.Table:
-        table = self.connection.table('functional_alltypes')
-        return table.mutate(
-            bool_col=table.bool_col.cast('bool'),
-            tinyint_col=table.tinyint_col.cast('int8'),
-            smallint_col=table.smallint_col.cast('int16'),
-        )
-
 
 @pytest.fixture(scope='session')
 def client(data_directory):
diff --git a/ibis/backends/polars/tests/test_udf.py b/ibis/backends/polars/tests/test_udf.py
index 078b15d3988b..f6a26e22ae92 100644
--- a/ibis/backends/polars/tests/test_udf.py
+++ b/ibis/backends/polars/tests/test_udf.py
@@ -43,6 +43,6 @@ def test_multiple_argument_udf(alltypes):
     result = expr.execute()
 
     df = alltypes[['smallint_col', 'int_col']].execute()
-    expected = (df.smallint_col + df.int_col).astype('int64')
+    expected = (df.smallint_col + df.int_col).astype('int32')
 
     tm.assert_series_equal(result, expected.rename('tmp'))
diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py
index a85f7a5ee489..62ddd6073b1d 100644
--- a/ibis/backends/postgres/__init__.py
+++ b/ibis/backends/postgres/__init__.py
@@ -108,8 +108,6 @@ def do_connect(
         PostgreSQLTable[table]
           name: functional_alltypes
           schema:
-            index : int64
-            Unnamed: 0 : int64
             id : int32
             bool_col : boolean
             tinyint_col : int16
diff --git a/ibis/backends/postgres/tests/conftest.py b/ibis/backends/postgres/tests/conftest.py
index 599740cf89b4..3589a334f545 100644
--- a/ibis/backends/postgres/tests/conftest.py
+++ b/ibis/backends/postgres/tests/conftest.py
@@ -88,7 +88,7 @@ def _load_data(
                 # `data_iter` argument would have to be turned back into a CSV
                 # before being passed to `copy_expert`.
                 sql = f"COPY {table} FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',')"
-                with data_dir.joinpath(f'{table}.csv').open('r') as file:
+                with data_dir.joinpath("csv", f'{table}.csv').open('r') as file:
                     cur.copy_expert(sql=sql, file=file)
 
     @staticmethod
diff --git a/ibis/backends/postgres/tests/test_functions.py b/ibis/backends/postgres/tests/test_functions.py
index 018aeee16a2a..f0043bf4fc08 100644
--- a/ibis/backends/postgres/tests/test_functions.py
+++ b/ibis/backends/postgres/tests/test_functions.py
@@ -73,8 +73,6 @@ def test_date_cast(alltypes, at, translate):
 @pytest.mark.parametrize(
     'column',
     [
-        'index',
-        'Unnamed: 0',
         'id',
         'bool_col',
         'tinyint_col',
diff --git a/ibis/backends/pyspark/tests/conftest.py b/ibis/backends/pyspark/tests/conftest.py
index ba05fd1abf72..9e19f79980b6 100644
--- a/ibis/backends/pyspark/tests/conftest.py
+++ b/ibis/backends/pyspark/tests/conftest.py
@@ -10,7 +10,6 @@
 import ibis
 from ibis import util
 from ibis.backends.conftest import TEST_TABLES
-from ibis.backends.pyspark.datatypes import spark_dtype
 from ibis.backends.tests.base import BackendTest, RoundAwayFromZero
 from ibis.backends.tests.data import win
 
@@ -44,40 +43,21 @@ def get_common_spark_testing_client(data_directory, connect):
         .getOrCreate()
     )
     _spark_testing_client = connect(spark)
-    s = _spark_testing_client._session
+    s: SparkSession = _spark_testing_client._session
     num_partitions = 4
 
-    s.read.csv(
-        path=str(data_directory / 'functional_alltypes.csv'),
-        schema=spark_dtype(
-            ibis.schema(
-                {
-                    # cast below, Spark can't read 0/1 as bool
-                    name: {"bool_col": "int8"}.get(name, dtype)
-                    for name, dtype in TEST_TABLES["functional_alltypes"].items()
-                }
-            )
-        ),
-        mode='FAILFAST',
-        header=True,
-    ).repartition(num_partitions).sort('index').withColumn(
-        "bool_col", F.column("bool_col").cast("boolean")
-    ).createOrReplaceTempView(
-        'functional_alltypes'
-    )
+    sort_cols = {"functional_alltypes": "id"}
 
-    for name, schema in TEST_TABLES.items():
-        if name != "functional_alltypes":
-            s.read.csv(
-                path=str(data_directory / f'{name}.csv'),
-                schema=spark_dtype(schema),
-                header=True,
-            ).repartition(num_partitions).createOrReplaceTempView(name)
+    for name in TEST_TABLES.keys():
+        path = str(data_directory / "parquet" / f"{name}.parquet")
+        t = s.read.parquet(path).repartition(num_partitions)
+        if (sort_col := sort_cols.get(name)) is not None:
+            t = t.sort(sort_col)
+        t.createOrReplaceTempView(name)
 
-    df_simple = s.createDataFrame([(1, 'a')], ['foo', 'bar'])
-    df_simple.createOrReplaceTempView('simple')
+    s.createDataFrame([(1, 'a')], ['foo', 'bar']).createOrReplaceTempView('simple')
 
-    df_struct = s.createDataFrame(
+    s.createDataFrame(
         [
             Row(abc=Row(a=1.0, b='banana', c=2)),
             Row(abc=Row(a=2.0, b='apple', c=3)),
@@ -87,33 +67,17 @@ def get_common_spark_testing_client(data_directory, connect):
             Row(abc=None),
             Row(abc=Row(a=3.0, b='orange', c=None)),
         ],
-        schema=pt.StructType(
-            [
-                pt.StructField(
-                    "abc",
-                    pt.StructType(
-                        [
-                            pt.StructField("a", pt.DoubleType(), True),
-                            pt.StructField("b", pt.StringType(), True),
-                            pt.StructField("c", pt.IntegerType(), True),
-                        ]
-                    ),
-                )
-            ]
-        ),
-    )
-    df_struct.createOrReplaceTempView('struct')
+    ).createOrReplaceTempView('struct')
 
-    df_nested_types = s.createDataFrame(
+    s.createDataFrame(
         [([1, 2], [[3, 4], [5, 6]], {'a': [[2, 4], [3, 5]]})],
         [
             'list_of_ints',
             'list_of_list_of_ints',
             'map_string_list_of_list_of_ints',
         ],
-    )
-    df_nested_types.createOrReplaceTempView('nested_types')
-    df_array_types = s.createDataFrame(
+    ).createOrReplaceTempView('nested_types')
+    s.createDataFrame(
         [
             (
                 [1, 2, 3],
@@ -144,21 +108,18 @@ def get_common_spark_testing_client(data_directory, connect):
             ),
         ],
         ["x", "y", "z", "grouper", "scalar_column", "multi_dim"],
-    )
-    df_array_types.createOrReplaceTempView("array_types")
+    ).createOrReplaceTempView("array_types")
 
-    df_complicated = s.createDataFrame(
+    s.createDataFrame(
         [({(1, 3): [[2, 4], [3, 5]]},)], ['map_tuple_list_of_list_of_ints']
-    )
-    df_complicated.createOrReplaceTempView('complicated')
+    ).createOrReplaceTempView('complicated')
 
-    df_udf = s.createDataFrame(
+    s.createDataFrame(
         [('a', 1, 4.0, 'a'), ('b', 2, 5.0, 'a'), ('c', 3, 6.0, 'b')],
         ['a', 'b', 'c', 'key'],
-    )
-    df_udf.createOrReplaceTempView('udf')
+    ).createOrReplaceTempView('udf')
 
-    df_udf_nan = s.createDataFrame(
+    s.createDataFrame(
         pd.DataFrame(
             {
                 'a': np.arange(10, dtype=float),
@@ -166,27 +127,24 @@ def get_common_spark_testing_client(data_directory, connect):
                 'key': list('ddeefffggh'),
             }
         )
-    )
-    df_udf_nan.createOrReplaceTempView('udf_nan')
+    ).createOrReplaceTempView('udf_nan')
 
-    df_udf_null = s.createDataFrame(
+    s.createDataFrame(
         [(float(i), None if i % 2 else 3.0, 'ddeefffggh'[i]) for i in range(10)],
         ['a', 'b', 'key'],
-    )
-    df_udf_null.createOrReplaceTempView('udf_null')
+    ).createOrReplaceTempView('udf_null')
 
-    df_udf_random = s.createDataFrame(
+    s.createDataFrame(
         pd.DataFrame(
             {
-                'a': np.arange(4, dtype=float).tolist() + np.random.rand(3).tolist(),
-                'b': np.arange(4, dtype=float).tolist() + np.random.rand(3).tolist(),
+                'a': np.arange(4.0).tolist() + np.random.rand(3).tolist(),
+                'b': np.arange(4.0).tolist() + np.random.rand(3).tolist(),
                 'key': list('ddeefff'),
             }
         )
-    )
-    df_udf_random.createOrReplaceTempView('udf_random')
+    ).createOrReplaceTempView('udf_random')
 
-    df_json_t = s.createDataFrame(
+    s.createDataFrame(
         pd.DataFrame(
             {
                 "js": [
@@ -199,11 +157,9 @@ def get_common_spark_testing_client(data_directory, connect):
                 ]
             }
         )
-    )
-    df_json_t.createOrReplaceTempView("json_t")
+    ).createOrReplaceTempView("json_t")
 
-    win_t = s.createDataFrame(win)
-    win_t.createOrReplaceTempView("win")
+    s.createDataFrame(win).createOrReplaceTempView("win")
 
     return _spark_testing_client
 
@@ -379,12 +335,7 @@ def temp_database(con, test_data_db):
 
 @pytest.fixture(scope='session')
 def alltypes(con):
-    return con.table('functional_alltypes').relabel({'Unnamed: 0': 'Unnamed:0'})
-
-
-@pytest.fixture(scope='session')
-def tmp_dir():
-    return f'/tmp/__ibis_test_{util.guid()}'
+    return con.table('functional_alltypes')
 
 
 @pytest.fixture
diff --git a/ibis/backends/pyspark/tests/test_ddl.py b/ibis/backends/pyspark/tests/test_ddl.py
index 55c1fb8687e2..47e137fa942f 100644
--- a/ibis/backends/pyspark/tests/test_ddl.py
+++ b/ibis/backends/pyspark/tests/test_ddl.py
@@ -40,8 +40,8 @@ def test_drop_non_empty_database(con, alltypes, temp_table_db):
 
 
 @pytest.fixture
-def temp_base(tmp_dir):
-    base = pjoin(tmp_dir, util.gen_name("temp_base"))
+def temp_base():
+    base = pjoin(f"/tmp/{util.gen_name('pyspark_testing')}", util.gen_name("temp_base"))
     yield base
     shutil.rmtree(base, ignore_errors=True)
 
diff --git a/ibis/backends/snowflake/tests/conftest.py b/ibis/backends/snowflake/tests/conftest.py
index 51db0e5d2e92..47bbd37ca134 100644
--- a/ibis/backends/snowflake/tests/conftest.py
+++ b/ibis/backends/snowflake/tests/conftest.py
@@ -24,7 +24,7 @@ def copy_into(con, data_dir: Path, table: str) -> None:
     stage = "ibis_testing"
     csv = f"{table}.csv"
     con.exec_driver_sql(
-        f"PUT file://{data_dir.joinpath(csv).absolute()} @{stage}/{csv}"
+        f"PUT file://{data_dir.joinpath('csv', csv).absolute()} @{stage}/{csv}"
     )
     con.exec_driver_sql(
         f"COPY INTO {table} FROM @{stage}/{csv} FILE_FORMAT = (FORMAT_NAME = ibis_testing)"
diff --git a/ibis/backends/sqlite/tests/conftest.py b/ibis/backends/sqlite/tests/conftest.py
index 823d042d7ed2..f3d1e84f8993 100644
--- a/ibis/backends/sqlite/tests/conftest.py
+++ b/ibis/backends/sqlite/tests/conftest.py
@@ -36,7 +36,7 @@ def __init__(self, data_directory: Path) -> None:
 
             for table in TEST_TABLES:
                 basename = f"{table}.csv"
-                with data_directory.joinpath(basename).open("r") as f:
+                with data_directory.joinpath("csv", basename).open("r") as f:
                     reader = csv.reader(f)
                     header = next(reader)
                     assert header, f"empty header for table: `{table}`"
diff --git a/ibis/backends/tests/snapshots/test_string/test_rlike/duckdb/out.sql b/ibis/backends/tests/snapshots/test_string/test_rlike/duckdb/out.sql
index c2556795fa50..cc2d5a123a56 100644
--- a/ibis/backends/tests/snapshots/test_string/test_rlike/duckdb/out.sql
+++ b/ibis/backends/tests/snapshots/test_string/test_rlike/duckdb/out.sql
@@ -1,6 +1,4 @@
 SELECT
-  t0.index,
-  t0."Unnamed: 0",
   t0.id,
   t0.bool_col,
   t0.tinyint_col,
diff --git a/ibis/backends/tests/snapshots/test_string/test_rlike/mysql/out.sql b/ibis/backends/tests/snapshots/test_string/test_rlike/mysql/out.sql
index 168a132ce471..4fcba540524e 100644
--- a/ibis/backends/tests/snapshots/test_string/test_rlike/mysql/out.sql
+++ b/ibis/backends/tests/snapshots/test_string/test_rlike/mysql/out.sql
@@ -1,6 +1,4 @@
 SELECT
-  t0.`index`,
-  t0.`Unnamed: 0`,
   t0.id,
   t0.bool_col = 1 AS bool_col,
   t0.tinyint_col,
diff --git a/ibis/backends/tests/snapshots/test_string/test_rlike/postgres/out.sql b/ibis/backends/tests/snapshots/test_string/test_rlike/postgres/out.sql
index 0de6a36f93fb..783fd1ccf507 100644
--- a/ibis/backends/tests/snapshots/test_string/test_rlike/postgres/out.sql
+++ b/ibis/backends/tests/snapshots/test_string/test_rlike/postgres/out.sql
@@ -1,6 +1,4 @@
 SELECT
-  t0.index,
-  t0."Unnamed: 0",
   t0.id,
   t0.bool_col,
   t0.tinyint_col,
diff --git a/ibis/backends/tests/snapshots/test_string/test_rlike/sqlite/out.sql b/ibis/backends/tests/snapshots/test_string/test_rlike/sqlite/out.sql
index c3811fb27285..ce7cfc3c0494 100644
--- a/ibis/backends/tests/snapshots/test_string/test_rlike/sqlite/out.sql
+++ b/ibis/backends/tests/snapshots/test_string/test_rlike/sqlite/out.sql
@@ -1,6 +1,4 @@
 SELECT
-  t0."index",
-  t0."Unnamed: 0",
   t0.id,
   t0.bool_col,
   t0.tinyint_col,
diff --git a/ibis/backends/tests/snapshots/test_string/test_rlike/trino/out.sql b/ibis/backends/tests/snapshots/test_string/test_rlike/trino/out.sql
index e543c3cc1498..fe88c55e121b 100644
--- a/ibis/backends/tests/snapshots/test_string/test_rlike/trino/out.sql
+++ b/ibis/backends/tests/snapshots/test_string/test_rlike/trino/out.sql
@@ -1,6 +1,4 @@
 SELECT
-  t0.index,
-  t0."unnamed: 0",
   t0.id,
   t0.bool_col,
   t0.tinyint_col,
diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py
index 4d303af38a29..f18d109b80db 100644
--- a/ibis/backends/tests/test_client.py
+++ b/ibis/backends/tests/test_client.py
@@ -104,10 +104,7 @@ def test_query_schema(ddl_backend, expr_fn, expected):
 
 @pytest.mark.notimpl(["datafusion", "snowflake", "polars", "mssql"])
 @pytest.mark.notyet(["sqlite"])
-@pytest.mark.never(
-    ["dask", "pandas"],
-    reason="dask and pandas do not support SQL",
-)
+@pytest.mark.never(["dask", "pandas"], reason="dask and pandas do not support SQL")
 def test_sql(backend, con):
     # execute the expression using SQL query
     table = backend.format_table("functional_alltypes")
@@ -127,13 +124,17 @@ def test_sql(backend, con):
 @mark.notimpl(["datafusion", "polars", "druid"])
 def test_create_table_from_schema(con, new_schema, temp_table):
     new_table = con.create_table(temp_table, schema=new_schema)
-    backend_mapping = backend_type_mapping.get(con.name, dict())
+    backend_mapping = backend_type_mapping.get(con.name, {})
 
-    for column_name, column_type in new_table.schema().items():
-        assert (
-            backend_mapping.get(new_schema[column_name], new_schema[column_name])
-            == column_type
-        )
+    result = ibis.schema(
+        {
+            column_name: backend_mapping.get(
+                new_schema[column_name], new_schema[column_name]
+            )
+            for column_name in new_table.schema().keys()
+        }
+    )
+    assert result == new_table.schema()
 
 
 @pytest.fixture(scope="session")
diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py
index d849cd32e6e2..95cfc48de913 100644
--- a/ibis/backends/tests/test_register.py
+++ b/ibis/backends/tests/test_register.py
@@ -32,7 +32,7 @@ def pushd(new_dir):
 def gzip_csv(data_directory, tmp_path):
     basename = "diamonds.csv"
     f = tmp_path.joinpath(f"{basename}.gz")
-    data = data_directory.joinpath(basename).read_bytes()
+    data = data_directory.joinpath("csv", basename).read_bytes()
     f.write_bytes(gzip.compress(data))
     return str(f.absolute())
 
@@ -93,7 +93,7 @@ def gzip_csv(data_directory, tmp_path):
     ]
 )
 def test_register_csv(con, data_directory, fname, in_table_name, out_table_name):
-    with pushd(data_directory):
+    with pushd(data_directory / "csv"):
         table = con.register(fname, table_name=in_table_name)
 
     assert any(out_table_name in t for t in con.list_tables())
@@ -143,7 +143,7 @@ def test_register_with_dotted_name(con, data_directory, tmp_path):
     basename = "foo.bar.baz/diamonds.csv"
     f = tmp_path.joinpath(basename)
     f.parent.mkdir()
-    data = data_directory.joinpath("diamonds.csv").read_bytes()
+    data = data_directory.joinpath("csv", "diamonds.csv").read_bytes()
     f.write_bytes(data)
     table = con.register(str(f.absolute()))
 
@@ -200,7 +200,7 @@ def test_register_parquet(
     pq = pytest.importorskip("pyarrow.parquet")
 
     fname = Path(fname)
-    table = read_table(data_directory / fname.name)
+    table = read_table(data_directory / "csv" / fname.name)
 
     pq.write_table(table, tmp_path / fname.name)
 
@@ -238,7 +238,7 @@ def test_register_iterator_parquet(
 ):
     pq = pytest.importorskip("pyarrow.parquet")
 
-    table = read_table(data_directory / "functional_alltypes.csv")
+    table = read_table(data_directory / "csv" / "functional_alltypes.csv")
 
     pq.write_table(table, tmp_path / "functional_alltypes.parquet")
 
@@ -424,7 +424,8 @@ def test_read_parquet(
     pq = pytest.importorskip("pyarrow.parquet")
 
     fname = Path(fname)
-    table = read_table(data_directory / fname.name)
+    fname = Path(data_directory) / "parquet" / fname.name
+    table = pq.read_table(fname)
 
     pq.write_table(table, tmp_path / fname.name)
 
@@ -468,7 +469,7 @@ def test_read_parquet(
     ]
 )
 def test_read_csv(con, data_directory, fname, in_table_name, out_table_name):
-    with pushd(data_directory):
+    with pushd(data_directory / "csv"):
         if con.name == "pyspark":
             # pyspark doesn't respect CWD
             fname = str(Path(fname).absolute())
diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py
index 03a3d0f803f0..22bc79b1d442 100644
--- a/ibis/backends/tests/test_vectorized_udf.py
+++ b/ibis/backends/tests/test_vectorized_udf.py
@@ -505,7 +505,7 @@ def add_one_struct_exact_once(v):
         path.touch()
         return v + 1, v + 2
 
-    struct = add_one_struct_exact_once(udf_alltypes['index'])
+    struct = add_one_struct_exact_once(udf_alltypes['id'])
 
     if method == "destructure":
         expr = udf_alltypes.mutate(struct.destructure())
diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py
index cffd43111b1c..15da4e89d044 100644
--- a/ibis/backends/tests/test_window.py
+++ b/ibis/backends/tests/test_window.py
@@ -910,7 +910,7 @@ def agg(df):
         return df
 
     expected = (
-        df.groupby("month")
+        df.groupby("month", group_keys=False)
         .apply(agg)
         .sort_values(["id"])
         .reset_index(drop=True)
diff --git a/justfile b/justfile
index b476515fe978..00f27ebc45c3 100644
--- a/justfile
+++ b/justfile
@@ -64,7 +64,7 @@ doctest *args:
     pytest --doctest-modules {{ args }} "${doctest_modules[@]}"
 
 # download testing data
-download-data owner="ibis-project" repo="testing-data" rev="master":
+download-data owner="cpcloud" repo="testing-data" rev="cleanup":
     #!/usr/bin/env bash
     outdir="{{ justfile_directory() }}/ci/ibis-testing-data"
     rm -rf "$outdir"
@@ -74,9 +74,14 @@ download-data owner="ibis-project" repo="testing-data" rev="master":
     if [ "{{ rev }}" = "master" ]; then
         args+=("--depth" "1")
     fi
+
     args+=("$outdir")
     git clone "${args[@]}"
 
+    if [ "{{ rev }}" != "master" ]; then
+        git -C "${outdir}" checkout "{{ rev }}"
+    fi
+
 # start backends using docker compose; no arguments starts all backends
 up *backends:
     docker compose up --wait {{ backends }}
diff --git a/nix/overlay.nix b/nix/overlay.nix
index fb9a044d92f3..baeeec7083e2 100644
--- a/nix/overlay.nix
+++ b/nix/overlay.nix
@@ -18,14 +18,13 @@ let
 in
 {
   ibisTestingData = pkgs.fetchFromGitHub {
-    owner = "ibis-project";
+    name = "ibis-testing-data";
+    owner = "cpcloud";
     repo = "testing-data";
-    rev = "master";
-    sha256 = "sha256-NbgEe0w/qf9hCr9rRfIpyaH9pv25I8x0ykY7EJxDOuk=";
+    rev = "cleanup";
+    sha256 = "sha256-q1b5IcOl5oIFXP7/P5RufncjHEVrWp4NjoU2uo/BE9U=";
   };
 
-  rustNightly = pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.minimal);
-
   ibis38 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python38; };
   ibis39 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python39; };
   ibis310 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python310; };