diff --git a/.github/workflows/ibis-backends-cloud.yml b/.github/workflows/ibis-backends-cloud.yml index 5654043ece5f..1085d97bdd75 100644 --- a/.github/workflows/ibis-backends-cloud.yml +++ b/.github/workflows/ibis-backends-cloud.yml @@ -102,6 +102,12 @@ jobs: - name: install poetry run: pip install 'poetry==1.8.3' + - name: remove databricks arrow and numpy upper bounds + if: matrix.backend.name != 'databricks' + run: | + poetry remove databricks-sql-connector + poetry update numpy pyarrow + - name: install additional deps if: matrix.backend.key == 'snowpark' run: poetry add snowflake-snowpark-python --python="==${{ steps.install_python.outputs.python-version }}" @@ -120,6 +126,19 @@ jobs: with: credentials_json: ${{ secrets.GCP_CREDENTIALS }} + - name: setup databricks credentials + if: matrix.backend.name == 'databricks' + run: | + { + echo "DATABRICKS_HTTP_PATH=${DATABRICKS_HTTP_PATH}" + echo "DATABRICKS_SERVER_HOSTNAME=${DATABRICKS_SERVER_HOSTNAME}" + echo "DATABRICKS_TOKEN=${DATABRICKS_TOKEN}" + } >> "$GITHUB_ENV" + env: + DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }} + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + - name: setup snowflake credentials if: matrix.backend.name == 'snowflake' run: | diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 70ffa1ab4af4..089d579e5cc1 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -455,6 +455,11 @@ jobs: - name: install poetry run: pip install 'poetry==1.8.3' + - name: remove databricks arrow and numpy upper bounds + run: | + poetry remove databricks-sql-connector + poetry update numpy pyarrow + - name: install ibis run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }} examples" @@ -499,8 +504,7 @@ jobs: - name: check that no untracked files were produced shell: bash - run: | - ! git status --porcelain | tee /dev/stderr | grep . + run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - name: upload code coverage if: success() @@ -609,7 +613,7 @@ jobs: - name: remove incompatible deps # it requires a version of pandas that min versions are not compatible with - run: poetry remove lonboard deltalake + run: poetry remove lonboard deltalake databricks-sql-connector - name: install minimum versions of required deps run: poetry add --lock ${{ join(matrix.backend.deps.required, ' ') }} --python="==${{ steps.install_python.outputs.python-version }}" @@ -715,7 +719,7 @@ jobs: - name: remove lonboard # it requires a version of pandas that pyspark is not compatible with - run: poetry remove lonboard + run: poetry remove lonboard databricks-sql-connector - name: install exact versions of pyspark, pandas and numpy run: poetry add --lock 'pyspark@${{ matrix.pyspark-version }}' ${{ join(matrix.deps, ' ') }} diff --git a/ci/schema/databricks.sql b/ci/schema/databricks.sql new file mode 100644 index 000000000000..a2c1bb3fb610 --- /dev/null +++ b/ci/schema/databricks.sql @@ -0,0 +1,63 @@ +CREATE VIEW IF NOT EXISTS diamonds AS +SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/diamonds.parquet`; + +CREATE VIEW IF NOT EXISTS batting AS +SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/batting.parquet`; + +CREATE VIEW IF NOT EXISTS awards_players AS +SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/awards_players.parquet`; + +CREATE VIEW IF NOT EXISTS functional_alltypes AS +SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/functional_alltypes.parquet`; + +CREATE VIEW IF NOT EXISTS astronauts AS +SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/astronauts.parquet`; + +CREATE TABLE IF NOT EXISTS `array_types` AS + VALUES (ARRAY(CAST(1 AS BIGINT), 2, 3), ARRAY('a', 'b', 'c'), ARRAY(1.0, 2.0, 3.0), 'a', 1.0, ARRAY(ARRAY(), ARRAY(CAST(1 AS BIGINT), 2, 3), NULL)), + (ARRAY(4, 5), ARRAY('d', 'e'), ARRAY(4.0, 5.0), 'a', 2.0, ARRAY()), + (ARRAY(6, NULL), ARRAY('f', NULL), ARRAY(6.0, NULL), 'a', 3.0, ARRAY(NULL, ARRAY(), NULL)), + (ARRAY(NULL, 1, NULL), ARRAY(NULL, 'a', NULL), ARRAY(), 'b', 4.0, ARRAY(ARRAY(1), ARRAY(2), ARRAY(), ARRAY(3, 4, 5))), + (ARRAY(2, NULL, 3), ARRAY('b', NULL, 'c'), NULL, 'b', 5.0, NULL), + (ARRAY(4, NULL, NULL, 5), ARRAY('d', NULL, NULL, 'e'), ARRAY(4.0, NULL, NULL, 5.0), 'c', 6.0, ARRAY(ARRAY(1, 2, 3))) + AS (`x`, `y`, `z`, `grouper`, `scalar_column`, `multi_dim`); + +CREATE TABLE IF NOT EXISTS `map` AS + VALUES (CAST(1 AS BIGINT), map('a', CAST(1 AS BIGINT), 'b', 2, 'c', 3)), + (2, map('d', 4, 'e', 5, 'f', 6)) AS (`idx`, `kv`); + +CREATE TABLE IF NOT EXISTS `struct` AS + VALUES (named_struct('a', 1.0, 'b', 'banana', 'c', CAST(2 AS BIGINT))), + (named_struct('a', 2.0, 'b', 'apple', 'c', 3)), + (named_struct('a', 3.0, 'b', 'orange', 'c', 4)), + (named_struct('a', NULL, 'b', 'banana', 'c', 2)), + (named_struct('a', 2.0, 'b', NULL, 'c', 3)), + (NULL), + (named_struct('a', 3.0, 'b', 'orange', 'c', NULL)) AS (`abc`); + +CREATE TABLE IF NOT EXISTS `json_t` AS + VALUES (CAST(1 AS BIGINT), parse_json('{"a": [1,2,3,4], "b": 1}')), + (2, parse_json('{"a":null,"b":2}')), + (3, parse_json('{"a":"foo", "c":null}')), + (4, parse_json('null')), + (5, parse_json('[42,47,55]')), + (6, parse_json('[]')), + (7, parse_json('"a"')), + (8, parse_json('""')), + (9, parse_json('"b"')), + (10, NULL), + (11, parse_json('true')), + (12, parse_json('false')), + (13, parse_json('42')), + (14, parse_json('37.37')) AS (`rowid`, `js`); + +CREATE TABLE IF NOT EXISTS `win` AS +VALUES + ('a', CAST(0 AS BIGINT), CAST(3 AS BIGINT)), + ('a', 1, 2), + ('a', 2, 0), + ('a', 3, 1), + ('a', 4, 1) AS (`g`, `x`, `y`); + +CREATE TABLE IF NOT EXISTS `topk` AS +VALUES (CAST(1 AS BIGINT)), (1), (NULL) AS (`x`); diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index f1c06f1a4adb..324877383029 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -819,7 +819,6 @@ class BaseBackend(abc.ABC, _FileIOHandler, CacheHandler): supports_temporary_tables = False supports_python_udfs = False - supports_in_memory_tables = True def __init__(self, *args, **kwargs): self._con_args: tuple[Any] = args @@ -1083,23 +1082,19 @@ def _register_in_memory_tables(self, expr: ir.Expr) -> None: memtable, self._finalize_in_memory_table, memtable.name ) + @abc.abstractmethod def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: - if self.supports_in_memory_tables: - raise NotImplementedError( - f"{self.name} must implement `_register_in_memory_table` to support in-memory tables" - ) + """Register an in-memory table associated with `op`.""" + + @abc.abstractmethod + def _finalize_memtable(self, name: str) -> None: + """Clean up a memtable named `name`.""" def _finalize_in_memory_table(self, name: str) -> None: """Wrap `_finalize_memtable` to suppress exceptions.""" with contextlib.suppress(Exception): self._finalize_memtable(name) - def _finalize_memtable(self, name: str) -> None: - if self.supports_in_memory_tables: - raise NotImplementedError( - f"{self.name} must implement `_finalize_memtable` to support in-memory tables" - ) - def _run_pre_execute_hooks(self, expr: ir.Expr) -> None: """Backend-specific hooks to run before an expression is executed.""" self._register_udfs(expr) diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index be3334373594..c8e2ac202ec3 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -63,6 +63,12 @@ class Options(ibis.config.Config): bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool" + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + """No-op.""" + + def _finalize_memtable(self, name: str) -> None: + """No-op.""" + def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend: """Connect to a backend using a URL `url`. diff --git a/ibis/backends/databricks/__init__.py b/ibis/backends/databricks/__init__.py new file mode 100644 index 000000000000..6e4a3ada29ff --- /dev/null +++ b/ibis/backends/databricks/__init__.py @@ -0,0 +1,636 @@ +"""Databricks backend.""" + +from __future__ import annotations + +import contextlib +import functools +import os +import tempfile +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import databricks.sql +import pyarrow as pa +import pyarrow_hotfix # noqa: F401 +import sqlglot as sg +import sqlglot.expressions as sge + +import ibis +import ibis.backends.sql.compilers as sc +import ibis.common.exceptions as exc +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir +from ibis import util +from ibis.backends import CanCreateDatabase, UrlFromPath +from ibis.backends.sql import SQLBackend +from ibis.backends.sql.compilers.base import STAR, AlterTable, C + +if TYPE_CHECKING: + from collections.abc import Mapping + + import pandas as pd + import polars as pl + + from ibis.expr.schema import SchemaLike + + +class Backend(SQLBackend, CanCreateDatabase, UrlFromPath): + name = "databricks" + compiler = sc.databricks.compiler + + @property + def current_catalog(self) -> str: + with self._safe_raw_sql(sg.select(self.compiler.f.current_catalog())) as cur: + [(db,)] = cur.fetchall() + return db + + @property + def current_database(self) -> str: + with self._safe_raw_sql(sg.select(self.compiler.f.current_database())) as cur: + [(db,)] = cur.fetchall() + return db + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(self.dialect) + cur = self.con.cursor() + try: + cur.execute(query, **kwargs) + except Exception: + cur.close() + raise + return cur + + def create_table( + self, + name: str, + obj: ir.Table + | pd.DataFrame + | pa.Table + | pl.DataFrame + | pl.LazyFrame + | None = None, + *, + schema: SchemaLike | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + using: str = "delta", + location: str | None = None, + tblproperties: Mapping[str, str] | None = None, + ): + """Create a table in Databricks. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + + For multi-level table hierarchies, you can pass in a dotted string + path like `"catalog.database"` or a tuple of strings like + `("catalog", "database")`. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + using + Data source format + location + Storage location for the table + tblproperties + Table properties + """ + if temp: + raise exc.UnsupportedOperationError("Temporary tables not yet supported") + + table_loc = self._to_sqlglot_table(database) + + catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database + + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + if schema is not None: + schema = ibis.schema(schema) + + properties = [sge.FileFormatProperty(this=self.compiler.v[using.upper()])] + + if location is not None: + properties.append(sge.LocationProperty(this=sge.convert(location))) + + for key, value in (tblproperties or {}).items(): + properties.append( + sge.Property(this=sge.convert(str(key)), value=sge.convert(str(value))) + ) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self.compiler.to_sqlglot(table) + else: + query = None + + if overwrite: + temp_name = util.gen_name("databricks_table") + else: + temp_name = name + + quoted = self.compiler.quoted + dialect = self.dialect + + initial_table = sg.table(temp_name, catalog=catalog, db=database, quoted=quoted) + target = sge.Schema( + this=initial_table, + expressions=(schema or table.schema()).to_sqlglot(dialect), + ) + + properties = sge.Properties(expressions=properties) + create_stmt = sge.Create(kind="TABLE", this=target, properties=properties) + + # This is the same table as initial_table unless overwrite == True + final_table = sg.table(name, catalog=catalog, db=database, quoted=quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.insert(query, into=initial_table).sql(dialect) + cur.execute(insert_stmt).fetchall() + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=final_table, exists=True).sql(dialect) + ) + if temp: + cur.execute( + sge.Create( + kind="TABLE", + this=final_table, + expression=sg.select(STAR).from_(initial_table), + properties=properties, + ).sql(dialect) + ) + cur.execute( + sge.Drop(kind="TABLE", this=initial_table, exists=True).sql( + dialect + ) + ) + else: + cur.execute( + AlterTable( + this=initial_table, + actions=[sge.RenameTable(this=final_table)], + ).sql(dialect) + ) + + return self.table(name, database=(catalog, database)) + + def table(self, name: str, database: str | None = None) -> ir.Table: + """Construct a table expression. + + Parameters + ---------- + name + Table name + database + Database name + + Returns + ------- + Table + Table expression + + """ + table_loc = self._to_sqlglot_table(database) + + # TODO: set these to better defaults + catalog = table_loc.catalog or None + database = table_loc.db or None + + table_schema = self.get_schema(name, catalog=catalog, database=database) + return ops.DatabaseTable( + name, + schema=table_schema, + source=self, + namespace=ops.Namespace(catalog=catalog, database=database), + ).to_expr() + + def get_schema( + self, + table_name: str, + *, + catalog: str | None = None, + database: str | None = None, + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + catalog + Catalog name + database + Database name + + Returns + ------- + sch.Schema + Ibis schema + """ + table = sg.table( + table_name, db=database, catalog=catalog, quoted=self.compiler.quoted + ) + sql = sge.Describe(kind="TABLE", this=table).sql(self.dialect) + try: + with self.con.cursor() as cur: + out = cur.execute(sql).fetchall_arrow() + except databricks.sql.exc.ServerOperationError as e: + raise exc.TableNotFound( + f"Table {table_name!r} not found in " + f"{catalog or self.current_catalog}.{database or self.current_database}" + ) from e + + names = out["col_name"].to_pylist() + types = out["data_type"].to_pylist() + + return sch.Schema( + dict(zip(names, map(self.compiler.type_mapper.from_string, types))) + ) + + @contextlib.contextmanager + def _safe_raw_sql(self, query, *args, **kwargs): + with contextlib.suppress(AttributeError): + query = query.sql(self.dialect) + with self.con.cursor() as cur: + yield cur.execute(query, *args, **kwargs) + + def list_catalogs(self, like: str | None = None) -> list[str]: + with self.con.cursor() as cur: + out = cur.catalogs().fetchall_arrow() + return self._filter_with_like(out["TABLE_CAT"].to_pylist(), like) + + def list_databases( + self, like: str | None = None, catalog: str | None = None + ) -> list[str]: + with self.con.cursor() as cur: + out = cur.schemas( + catalog_name=catalog or self.current_catalog + ).fetchall_arrow() + return self._filter_with_like(out["TABLE_SCHEM"].to_pylist(), like=like) + + @functools.cached_property + def version(self) -> str: + query = sg.select(self.compiler.f.current_version()) + with self._safe_raw_sql(query) as cur: + [(version_info,)] = cur.fetchall() + return version_info["dbsql_version"] + + def do_connect( + self, + *, + server_hostname: str | None = None, + http_path: str | None = None, + access_token: str | None = None, + auth_type: str | None = None, + credentials_provider: str | None = None, + password: str | None = None, + username: str | None = None, + session_configuration: Mapping[str, str] | None = None, + http_headers: list[tuple[str, str]] | None = None, + catalog: str | None = None, + schema: str = "default", + use_cloud_fetch: bool = False, + memtable_volume: str | None = "__ibis_memtables__", + staging_allowed_local_path: str | None = None, + **config: Any, + ) -> None: + """Create an Ibis client connected to a Databricks cloud instance.""" + if staging_allowed_local_path is None: + staging_allowed_local_path = tempfile.gettempdir() + self.con = databricks.sql.connect( + server_hostname=( + server_hostname or os.environ.get("DATABRICKS_SERVER_HOSTNAME") + ), + http_path=http_path or os.environ.get("DATABRICKS_HTTP_PATH"), + access_token=access_token or os.environ.get("DATABRICKS_TOKEN"), + auth_type=auth_type, + credentials_provider=credentials_provider, + password=password, + username=username, + session_configuration=session_configuration, + http_headers=http_headers, + catalog=catalog, + schema=schema, + use_cloud_fetch=use_cloud_fetch, + staging_allowed_local_path=staging_allowed_local_path, + **config, + ) + self._memtable_volume = memtable_volume + self._memtable_catalog = self.current_catalog + self._memtable_database = self.current_database + self._post_connect(memtable_volume=memtable_volume) + + @contextlib.contextmanager + def begin(self): + with self.con.cursor() as cur: + yield cur + + @util.experimental + @classmethod + def from_connection( + cls, con, memtable_volume: str = "__ibis_memtables__" + ) -> Backend: + """Create an Ibis client from an existing connection to a Databricks cloud instance. + + Parameters + ---------- + con + An existing connection to a Databricks database. + memtable_volume + The volume to use for Ibis memtables. + """ + new_backend = cls() + new_backend._can_reconnect = False + new_backend.con = con + new_backend._post_connect(memtable_volume=memtable_volume) + return new_backend + + def _post_connect(self, *, memtable_volume: str) -> None: + sql = f"CREATE VOLUME IF NOT EXISTS {memtable_volume} COMMENT 'Ibis memtable storage volume'" + with self.con.cursor() as cur: + cur.execute(sql) + + @functools.cached_property + def _memtable_volume_path(self) -> str: + return f"/Volumes/{self._memtable_catalog}/{self._memtable_database}/{self._memtable_volume}" + + def _in_memory_table_exists(self, name: str) -> bool: + sql = ( + sg.select(self.compiler.f.count(STAR)) + .from_( + sg.table("views", db="information_schema", catalog=self.current_catalog) + ) + .where( + C.table_name.eq(sge.convert(name)), + C.table_schema.eq(self.compiler.f.current_database()), + ) + ) + with self._safe_raw_sql(sql) as cur: + [(out,)] = cur.fetchall() + + assert 0 <= out <= 1, str(out) + return out == 1 + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + import pyarrow.parquet as pq + + quoted = self.compiler.quoted + name = op.name + stem = f"{name}.parquet" + + upstream_path = f"{self._memtable_volume_path}/{stem}" + sql = sge.Create( + kind="VIEW", + this=sg.table( + name, + db=self.current_database, + catalog=self.current_catalog, + quoted=quoted, + ), + expression=sge.select(STAR).from_( + sg.table(upstream_path, db="parquet", quoted=quoted) + ), + ).sql(self.dialect) + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: + data = op.data.to_pyarrow(schema=op.schema) + path = Path(tmpdir, stem) + put_into = f"PUT '{path}' INTO '{upstream_path}' OVERWRITE" + # optimize for bandwidth so use zstd which typically compresses + # better than the other options without much loss in speed + pq.write_table(data, path, compression="zstd") + with self.con.cursor() as cur: + cur.execute(put_into) + cur.execute(sql) + + def _finalize_memtable(self, name: str) -> None: + path = f"{self._memtable_volume_path}/{name}.parquet" + sql = sge.Drop( + kind="VIEW", + this=sg.to_identifier(name, quoted=self.compiler.quoted), + exists=True, + ).sql(self.dialect) + with self.con.cursor() as cur: + cur.execute(sql) + cur.execute(f"REMOVE '{path}'") + + def create_database( + self, name: str, catalog: str | None = None, force: bool = False + ) -> None: + name = sg.table(name, catalog=catalog, quoted=self.compiler.quoted) + with self._safe_raw_sql(sge.Create(this=name, kind="SCHEMA", replace=force)): + pass + + def drop_database( + self, name: str, catalog: str | None = None, force: bool = False + ) -> None: + name = sg.table(name, catalog=catalog, quoted=self.compiler.quoted) + with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)): + pass + + def list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List tables and views. + + ::: {.callout-note} + ## Ibis does not use the word `schema` to refer to database hierarchy. + + A collection of tables is referred to as a `database`. + A collection of `database` is referred to as a `catalog`. + + These terms are mapped onto the corresponding features in each + backend (where available), regardless of whether the backend itself + uses the same terminology. + ::: + + Parameters + ---------- + like + Regex to filter by table/view name. + database + Database location. If not passed, uses the current database. + + By default uses the current `database` (`self.current_database`) and + `catalog` (`self.current_catalog`). + + To specify a table in a separate catalog, you can pass in the + catalog and database as a string `"catalog.database"`, or as a tuple of + strings `("catalog", "database")`. + + Returns + ------- + list[str] + List of table and view names. + + Examples + -------- + >>> import ibis + >>> con = ibis.databricks.connect() + >>> foo = con.create_table("foo", schema=ibis.schema(dict(a="int"))) + >>> con.list_tables() + ['foo'] + >>> bar = con.create_view("bar", foo) + >>> con.list_tables() + ['bar', 'foo'] + >>> con.create_database("my_database") + >>> con.list_tables(database="my_database") + [] + >>> con.raw_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS + <... object at 0x...> + >>> con.list_tables(database="my_database") + ['baz'] + + """ + table_loc = self._to_sqlglot_table(database) + + catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database + + with self.con.cursor() as cur: + cur.tables(catalog_name=catalog, schema_name=database) + out = cur.fetchall_arrow() + + return self._filter_with_like(out["TABLE_NAME"].to_pylist(), like) + + def to_pyarrow_batches( + self, + expr: ir.Expr, + *, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + chunk_size: int = 1_000_000, + **_: Any, + ) -> pa.ipc.RecordBatchReader: + """Return a stream of record batches. + + The returned `RecordBatchReader` contains a cursor with an unbounded lifetime. + + For analytics use cases this is usually nothing to fret about. In some cases you + may need to explicit release the cursor. + + Parameters + ---------- + expr + Ibis expression + params + Bound parameters + limit + Limit the result to this number of rows + chunk_size + The number of rows to fetch per batch + """ + self._run_pre_execute_hooks(expr) + table = expr.as_table() + sql = self.compile(table, limit=limit, params=params) + + def batch_producer(con, sql): + with con.cursor() as cur: + batched_cur = cur.execute(sql) + while batch := batched_cur.fetchmany_arrow(size=chunk_size): + yield from batch.to_batches() + + pyarrow_schema = expr.as_table().schema().to_pyarrow() + producer = batch_producer(self.con, sql) + return pa.ipc.RecordBatchReader.from_batches(pyarrow_schema, producer) + + def to_pyarrow( + self, + expr: ir.Expr, + *, + params: Mapping[ir.Scalar, Any] | None = None, + limit: int | str | None = None, + **kwargs: Any, + ) -> pa.Table: + self._run_pre_execute_hooks(expr) + + sql = self.compile(expr, limit=limit, params=params, **kwargs) + with self._safe_raw_sql(sql) as cur: + res = cur.fetchall_arrow() + + target_schema = expr.as_table().schema().to_pyarrow() + if res is None: + res = target_schema.empty_table() + + return expr.__pyarrow_result__(res) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + if (table := cursor.fetchall_arrow()) is None: + table = schema.to_pyarrow().empty_table() + df = table.to_pandas(timestamp_as_object=True) + df.columns = list(schema.names) + return df + + def _get_schema_using_query(self, query: str) -> sch.Schema: + with self._safe_raw_sql( + sge.Describe(this=sg.parse_one(query, read=self.dialect)) + ) as cur: + rows = cur.fetchall_arrow() + + rows = rows.to_pydict() + + type_mapper = self.compiler.type_mapper + return sch.Schema( + { + name: type_mapper.from_string(typ, nullable=True) + for name, typ in zip(rows["col_name"], rows["data_type"]) + } + ) + + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + + def _create_temp_view(self, table_name, source): + with self._safe_raw_sql(self._get_temp_view_definition(table_name, source)): + pass + + def rename_table(self, old_name: str, new_name: str) -> None: + """Rename an existing table. + + Parameters + ---------- + old_name + The old name of the table. + new_name + The new name of the table. + + """ + old = sg.table(old_name, quoted=True) + new = sg.table(new_name, quoted=True) + query = AlterTable( + this=old, exists=False, actions=[sge.RenameTable(this=new, exists=True)] + ) + with self._safe_raw_sql(query): + pass diff --git a/ibis/backends/databricks/tests/__init__.py b/ibis/backends/databricks/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ibis/backends/databricks/tests/conftest.py b/ibis/backends/databricks/tests/conftest.py new file mode 100644 index 000000000000..5ad70043e2d4 --- /dev/null +++ b/ibis/backends/databricks/tests/conftest.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import concurrent.futures +from os import environ as env +from typing import TYPE_CHECKING, Any + +import ibis +from ibis.backends.tests.base import BackendTest + +if TYPE_CHECKING: + from ibis.backends import BaseBackend + + +def put_into(con, query): + with con.cursor() as cur: + cur.execute(query) + + +class TestConf(BackendTest): + supports_map = True + driver_supports_multiple_statements = False + deps = ("databricks.sql",) + + def _load_data(self, **_: Any) -> None: + import databricks.sql + + files = list(self.data_dir.joinpath("parquet").glob("*.parquet")) + volume_prefix = "/Volumes/ibis_testing/default/testing_data/parquet" + with ( + concurrent.futures.ThreadPoolExecutor() as exe, + databricks.sql.connect( + server_hostname=env["DATABRICKS_SERVER_HOSTNAME"], + http_path=env["DATABRICKS_HTTP_PATH"], + access_token=env["DATABRICKS_TOKEN"], + staging_allowed_local_path=str(self.data_dir), + ) as con, + ): + for fut in concurrent.futures.as_completed( + exe.submit( + put_into, + con, + f"PUT '{file}' INTO '{volume_prefix}/{file.name}' OVERWRITE", + ) + for file in files + ): + fut.result() + + @staticmethod + def connect(*, tmpdir, worker_id, **kw) -> BaseBackend: + return ibis.databricks.connect( + server_hostname=env["DATABRICKS_SERVER_HOSTNAME"], + http_path=env["DATABRICKS_HTTP_PATH"], + access_token=env["DATABRICKS_TOKEN"], + catalog="ibis_testing", + schema="default", + **kw, + ) diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 97ea8a06a8f5..78d6407a338b 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -49,6 +49,12 @@ class Backend(SQLBackend, CanCreateDatabase, NoUrl): supports_temporary_tables = True supports_python_udfs = True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + """No-op.""" + + def _finalize_memtable(self, name: str) -> None: + """No-op.""" + @property def dialect(self): # TODO: remove when ported to sqlglot diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index d424d903179e..0ca85df5bbab 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -682,7 +682,7 @@ def create_table( raw_table = sg.table(temp_name, catalog=catalog, db=db, quoted=False) target = sge.Schema( this=sg.table( - "#" * temp + temp_name, catalog=catalog, db=db, quoted=quoted + "#" * bool(temp) + temp_name, catalog=catalog, db=db, quoted=quoted ), expressions=schema.to_sqlglot(self.dialect), ) @@ -701,7 +701,7 @@ def create_table( # for the subsequent `Insert`, so we need to shove a `#` in # front of the table identifier. _table = sg.table( - "##" * temp + temp_name, + "##" * bool(temp) + temp_name, catalog=catalog, db=db, quoted=self.compiler.quoted, diff --git a/ibis/backends/sql/compilers/__init__.py b/ibis/backends/sql/compilers/__init__.py index d2105ed00279..3f876ec9b44e 100644 --- a/ibis/backends/sql/compilers/__init__.py +++ b/ibis/backends/sql/compilers/__init__.py @@ -3,6 +3,7 @@ __all__ = [ "BigQueryCompiler", "ClickHouseCompiler", + "DatabricksCompiler", "DataFusionCompiler", "DruidCompiler", "DuckDBCompiler", @@ -22,6 +23,7 @@ from ibis.backends.sql.compilers.bigquery import BigQueryCompiler from ibis.backends.sql.compilers.clickhouse import ClickHouseCompiler +from ibis.backends.sql.compilers.databricks import DatabricksCompiler from ibis.backends.sql.compilers.datafusion import DataFusionCompiler from ibis.backends.sql.compilers.druid import DruidCompiler from ibis.backends.sql.compilers.duckdb import DuckDBCompiler diff --git a/ibis/backends/sql/compilers/databricks.py b/ibis/backends/sql/compilers/databricks.py new file mode 100644 index 000000000000..1ff62ae70bf6 --- /dev/null +++ b/ibis/backends/sql/compilers/databricks.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import sqlglot as sg +import sqlglot.expressions as sge + +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.sql.compilers.pyspark import PySparkCompiler +from ibis.backends.sql.dialects import Databricks + + +class DatabricksCompiler(PySparkCompiler): + __slots__ = () + dialect = Databricks + + SIMPLE_OPS = PySparkCompiler.SIMPLE_OPS | { + ops.Divide: "try_divide", + ops.Mode: "mode", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.TypeOf: "typeof", + } + + UNSUPPORTED_OPS = ( + ops.ElementWiseVectorizedUDF, + ops.AnalyticVectorizedUDF, + ops.ReductionVectorizedUDF, + ops.RowID, + ops.TimestampBucket, + ) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_binary(): + return self.f.unhex(value.hex()) + elif dtype.is_decimal(): + if value.is_finite(): + return self.cast(str(value), dtype) + else: + return self.cast(str(value), dt.float64) + elif dtype.is_uuid(): + return sge.convert(str(value)) + else: + return None + + def visit_Field(self, op, *, rel, name): + return sg.column( + self._gen_valid_name(name), table=rel.alias_or_name, quoted=self.quoted + ) + + def visit_RandomUUID(self, _): + return self.f.uuid() + + def visit_StringSplit(self, op, *, arg, delimiter): + return self.f.anon.split(arg, delimiter) + + +compiler = DatabricksCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 5587a5186a76..b4b1b28247d3 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -62,7 +62,6 @@ class PySparkCompiler(SQLGlotCompiler): UNSUPPORTED_OPS = ( ops.RowID, ops.TimestampBucket, - ops.RandomUUID, ) LOWERED_OPS = { @@ -689,5 +688,10 @@ def visit_ArraySum(self, op, *, arg): def visit_ArrayMean(self, op, *, arg): return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) + def visit_RandomUUID(self, _): + raise com.UnsupportedOperationError( + "UUID operation not supported in the PySpark backend" + ) + compiler = PySparkCompiler() diff --git a/ibis/backends/sql/datatypes.py b/ibis/backends/sql/datatypes.py index b76630b75900..deab72cb464c 100644 --- a/ibis/backends/sql/datatypes.py +++ b/ibis/backends/sql/datatypes.py @@ -1231,6 +1231,10 @@ def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType: ) +class DatabricksType(SqlglotType): + dialect = "databricks" + + TYPE_MAPPERS = { mapper.dialect: mapper for mapper in set(get_subclasses(SqlglotType)) - {SqlglotType, BigQueryUDFType} diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index 8f62c3c57f59..65e2175d7450 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -9,6 +9,7 @@ from sqlglot import transforms from sqlglot.dialects import ( TSQL, + Databricks, Hive, MySQL, Oracle, @@ -469,10 +470,19 @@ class Generator(Postgres.Generator): SQLite.Generator.TYPE_MAPPING |= {sge.DataType.Type.BOOLEAN: "BOOLEAN"} - Trino.Generator.TRANSFORMS |= { sge.BitwiseLeftShift: rename_func("bitwise_left_shift"), sge.BitwiseRightShift: rename_func("bitwise_right_shift"), sge.FirstValue: rename_func("first_value"), sge.LastValue: rename_func("last_value"), } + +Databricks.Generator.TRANSFORMS |= { + # required because of https://github.com/tobymao/sqlglot/pull/4142 + sge.Create: transforms.preprocess( + [ + transforms.remove_unique_constraints, + transforms.move_partitioned_by_to_schema_columns, + ] + ) +} diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index 17bb81b97849..ca540a359a46 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -153,3 +153,10 @@ from pyodbc import ProgrammingError as PyODBCProgrammingError except ImportError: PyODBCProgrammingError = PyODBCDataError = None + +try: + from databricks.sql.exc import ( + ServerOperationError as DatabricksServerOperationError, + ) +except ImportError: + DatabricksServerOperationError = None diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/databricks/out.sql new file mode 100644 index 000000000000..2b7d5f7566bb --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/databricks/out.sql @@ -0,0 +1,20 @@ +WITH `t1` AS ( + SELECT + * + FROM `leaf` AS `t0` + WHERE + TRUE +) +SELECT + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/databricks/out.sql new file mode 100644 index 000000000000..ac006b1d5f25 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/databricks/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE `t0`.`continent` + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS `cont`, + SUM(`t0`.`population`) AS `total_pop` +FROM `countries` AS `t0` +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/databricks/out.sql new file mode 100644 index 000000000000..d7889c812077 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/databricks/out.sql @@ -0,0 +1,9 @@ +SELECT + `t0`.`x` IN ( + SELECT + * + FROM `t` AS `t0` + WHERE + `t0`.`x` > 2 + ) AS `InSubquery(x)` +FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/databricks/out.sql new file mode 100644 index 000000000000..461eaa7a2517 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/databricks/out.sql @@ -0,0 +1,14 @@ +SELECT + `t1`.`x`, + `t1`.`y` +FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` + FROM `t` AS `t0` +) AS `t1` +WHERE + `t1`.`y` <= 37 +QUALIFY + AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) IS NOT NULL + AND NOT ISNAN(AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/databricks/out.sql new file mode 100644 index 000000000000..b78291662b87 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/databricks/out.sql @@ -0,0 +1,4 @@ +SELECT + NTILE(2) OVER (ORDER BY RAND() ASC NULLS LAST) - 1 AS `new_col` +FROM `test` AS `t0` +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/block.sql new file mode 100644 index 000000000000..c9099652a914 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` TABLESAMPLE (50.0 PERCENT) AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/row.sql new file mode 100644 index 000000000000..c9099652a914 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-subquery/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` TABLESAMPLE (50.0 PERCENT) AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/block.sql new file mode 100644 index 000000000000..27955a82792f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) TABLESAMPLE (50.0 PERCENT) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/row.sql new file mode 100644 index 000000000000..27955a82792f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/databricks-table/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) TABLESAMPLE (50.0 PERCENT) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-random/out.sql b/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-random/out.sql new file mode 100644 index 000000000000..551447b11ff7 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-random/out.sql @@ -0,0 +1,12 @@ +SELECT + `t1`.`x`, + `t1`.`y`, + `t1`.`z`, + IF(`t1`.`y` = `t1`.`z`, 'big', 'small') AS `size` +FROM ( + SELECT + `t0`.`x`, + RAND() AS `y`, + RAND() AS `z` + FROM `t` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-uuid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-uuid/out.sql new file mode 100644 index 000000000000..fb13922470e3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_selects_with_impure_operations_not_merged/databricks-uuid/out.sql @@ -0,0 +1,12 @@ +SELECT + `t1`.`x`, + `t1`.`y`, + `t1`.`z`, + IF(`t1`.`y` = `t1`.`z`, 'big', 'small') AS `size` +FROM ( + SELECT + `t0`.`x`, + UUID() AS `y`, + UUID() AS `z` + FROM `t` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_to_sql_default_backend/databricks/to_sql.sql b/ibis/backends/tests/snapshots/test_sql/test_to_sql_default_backend/databricks/to_sql.sql new file mode 100644 index 000000000000..5585098548e1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_to_sql_default_backend/databricks/to_sql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS `CountStar()` +FROM ( + SELECT + * + FROM `mytable` AS `t0` +) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/databricks/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/databricks/out.sql new file mode 100644 index 000000000000..cc6373fb6cf7 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/databricks/out.sql @@ -0,0 +1,80 @@ +WITH `t5` AS ( + SELECT + `t4`.`field_of_study`, + FIRST(`t4`.`diff`) IGNORE NULLS AS `diff` + FROM ( + SELECT + `t3`.`field_of_study`, + `t3`.`years`, + `t3`.`degrees`, + `t3`.`earliest_degrees`, + `t3`.`latest_degrees`, + `t3`.`latest_degrees` - `t3`.`earliest_degrees` AS `diff` + FROM ( + SELECT + `t2`.`field_of_study`, + `t2`.`years`, + `t2`.`degrees`, + FIRST(`t2`.`degrees`) IGNORE NULLS OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `earliest_degrees`, + LAST(`t2`.`degrees`) IGNORE NULLS OVER (PARTITION BY `t2`.`field_of_study` ORDER BY `t2`.`years` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `latest_degrees` + FROM ( + SELECT + `t1`.`field_of_study`, + `t1`.`__pivoted__`.`years` AS `years`, + `t1`.`__pivoted__`.`degrees` AS `degrees` + FROM ( + SELECT + `t0`.`field_of_study`, + EXPLODE( + ARRAY( + STRUCT('1970-71' AS `years`, `t0`.`1970-71` AS `degrees`), + STRUCT('1975-76' AS `years`, `t0`.`1975-76` AS `degrees`), + STRUCT('1980-81' AS `years`, `t0`.`1980-81` AS `degrees`), + STRUCT('1985-86' AS `years`, `t0`.`1985-86` AS `degrees`), + STRUCT('1990-91' AS `years`, `t0`.`1990-91` AS `degrees`), + STRUCT('1995-96' AS `years`, `t0`.`1995-96` AS `degrees`), + STRUCT('2000-01' AS `years`, `t0`.`2000-01` AS `degrees`), + STRUCT('2005-06' AS `years`, `t0`.`2005-06` AS `degrees`), + STRUCT('2010-11' AS `years`, `t0`.`2010-11` AS `degrees`), + STRUCT('2011-12' AS `years`, `t0`.`2011-12` AS `degrees`), + STRUCT('2012-13' AS `years`, `t0`.`2012-13` AS `degrees`), + STRUCT('2013-14' AS `years`, `t0`.`2013-14` AS `degrees`), + STRUCT('2014-15' AS `years`, `t0`.`2014-15` AS `degrees`), + STRUCT('2015-16' AS `years`, `t0`.`2015-16` AS `degrees`), + STRUCT('2016-17' AS `years`, `t0`.`2016-17` AS `degrees`), + STRUCT('2017-18' AS `years`, `t0`.`2017-18` AS `degrees`), + STRUCT('2018-19' AS `years`, `t0`.`2018-19` AS `degrees`), + STRUCT('2019-20' AS `years`, `t0`.`2019-20` AS `degrees`) + ) + ) AS `__pivoted__` + FROM `humanities` AS `t0` + ) AS `t1` + ) AS `t2` + ) AS `t3` + ) AS `t4` + GROUP BY + 1 +) +SELECT + * +FROM ( + SELECT + * + FROM `t5` AS `t6` + ORDER BY + `t6`.`diff` DESC + LIMIT 10 +) AS `t9` +UNION ALL +SELECT + * +FROM ( + SELECT + * + FROM `t5` AS `t6` + WHERE + `t6`.`diff` < 0 + ORDER BY + `t6`.`diff` ASC NULLS LAST + LIMIT 10 +) AS `t10` \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 2ff92c14f361..8ce97d26fb5d 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -14,6 +14,7 @@ from ibis import literal as L from ibis.backends.tests.errors import ( ClickHouseDatabaseError, + DatabricksServerOperationError, ExaQueryError, GoogleBadRequest, ImpalaHiveServer2Error, @@ -67,6 +68,7 @@ def mean_udf(s): "oracle", "flink", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -215,6 +217,7 @@ def test_aggregate_grouped(backend, alltypes, df, result_fn, expected_fn): "oracle", "flink", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ) @@ -592,7 +595,7 @@ def test_reduction_ops( reason="`include_null=True` is not supported", ), pytest.mark.notimpl( - ["bigquery", "pyspark"], + ["bigquery", "pyspark", "databricks"], raises=com.UnsupportedOperationError, reason="Can't mix `where` and `include_null=True`", strict=False, @@ -629,7 +632,7 @@ def test_first_last(alltypes, method, filtered, include_null): @pytest.mark.notimpl( - ["clickhouse", "exasol", "flink", "pyspark", "sqlite"], + ["clickhouse", "exasol", "flink", "pyspark", "sqlite", "databricks"], raises=com.UnsupportedOperationError, ) @pytest.mark.notimpl( @@ -658,7 +661,7 @@ def test_first_last(alltypes, method, filtered, include_null): reason="`include_null=True` is not supported", ), pytest.mark.notimpl( - ["bigquery", "pyspark"], + ["bigquery"], raises=com.UnsupportedOperationError, reason="Can't mix `where` and `include_null=True`", strict=False, @@ -1176,6 +1179,11 @@ def test_median(alltypes, df): @pytest.mark.notyet( ["pyspark"], raises=AssertionError, reason="pyspark returns null for string median" ) +@pytest.mark.notyet( + ["databricks"], + raises=DatabricksServerOperationError, + reason="percentile of string is not allowed", +) @pytest.mark.notyet( ["snowflake"], raises=SnowflakeProgrammingError, @@ -1237,6 +1245,11 @@ def test_string_quantile(alltypes, func): @pytest.mark.notyet( ["polars"], raises=PolarsInvalidOperationError, reason="not supported upstream" ) +@pytest.mark.notyet( + ["databricks"], + raises=DatabricksServerOperationError, + reason="percentile of string is not allowed", +) def test_date_quantile(alltypes): expr = alltypes.timestamp_col.date().quantile(0.5) result = expr.execute() @@ -1316,7 +1329,16 @@ def test_group_concat( @pytest.mark.notimpl( - ["clickhouse", "datafusion", "druid", "flink", "impala", "pyspark", "sqlite"], + [ + "clickhouse", + "datafusion", + "druid", + "flink", + "impala", + "pyspark", + "sqlite", + "databricks", + ], raises=com.UnsupportedOperationError, ) @pytest.mark.parametrize("filtered", [False, True]) @@ -1343,11 +1365,13 @@ def gen_test_collect_marks(distinct, filtered, ordered, include_null): yield pytest.mark.notimpl(["datafusion"], raises=com.UnsupportedOperationError) if ordered: yield pytest.mark.notimpl( - ["clickhouse", "pyspark", "flink"], raises=com.UnsupportedOperationError + ["clickhouse", "pyspark", "flink", "databricks"], + raises=com.UnsupportedOperationError, ) if include_null: yield pytest.mark.notimpl( - ["clickhouse", "pyspark", "snowflake"], raises=com.UnsupportedOperationError + ["clickhouse", "pyspark", "snowflake", "databricks"], + raises=com.UnsupportedOperationError, ) # Handle special cases @@ -1480,6 +1504,7 @@ def test_topk_filter_op(con, alltypes, df, result_fn, expected_fn): "oracle", "exasol", "flink", + "databricks", ], raises=com.OperationNotDefinedError, ) @@ -1528,6 +1553,7 @@ def test_aggregate_list_like(backend, alltypes, df, agg_fn): "flink", "exasol", "flink", + "databricks", ], raises=com.OperationNotDefinedError, ) @@ -1646,6 +1672,7 @@ def test_grouped_case(backend, con): @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.notyet(["risingwave"], raises=AssertionError, strict=False) +@pytest.mark.notyet(["databricks"], raises=DatabricksServerOperationError) def test_group_concat_over_window(backend, con): # TODO: this test is flaky on risingwave and I DO NOT LIKE IT input_df = pd.DataFrame( diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 1da572428f20..8810b9eee40e 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -18,6 +18,7 @@ import ibis.expr.types as ir from ibis.backends.tests.errors import ( ClickHouseDatabaseError, + DatabricksServerOperationError, GoogleBadRequest, MySQLOperationalError, PolarsComputeError, @@ -451,6 +452,11 @@ def test_array_slice(backend, start, stop): raises=AssertionError, reason="somehow, transformed results are different types", ), + pytest.mark.notyet( + ["databricks"], + raises=AssertionError, + reason="nulls come back as NaN", + ), ], id="nulls", ), @@ -504,6 +510,11 @@ def test_array_map(con, input, output, func): raises=AssertionError, reason="somehow, transformed results are different types", ), + pytest.mark.notimpl( + ["databricks"], + raises=AssertionError, + reason="nans instead of nulls", + ), ], id="nulls", ), @@ -791,6 +802,9 @@ def test_array_remove(con, input, expected): raises=AssertionError, reason="somehow, transformed results are different types", ), + pytest.mark.notimpl( + ["databricks"], raises=AssertionError, reason="nulls are nans" + ), ], ), param( @@ -869,6 +883,9 @@ def test_array_sort(con, data): raises=AssertionError, reason="somehow, transformed results are different types", ), + pytest.mark.notimpl( + ["databricks"], raises=AssertionError, reason="nulls are nans" + ), ], ), param( @@ -1045,6 +1062,12 @@ def test_zip_null(con, fn): reason="pyspark doesn't seem to support field selection on explode", raises=PySparkAnalysisException, ) +@pytest.mark.notimpl( + ["databricks"], + reason="databricks supports about 4 ways to explode, and " + "sqlglot doesn't implement the one that would enable this operation", + raises=DatabricksServerOperationError, +) @pytest.mark.notimpl( ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError ) diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index ab728de18dbc..2c11d765d608 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -98,6 +98,7 @@ def time_keyed_right(time_keyed_df2): "sqlite", "risingwave", "flink", + "databricks", ] ) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): @@ -127,6 +128,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.notyet( [ "bigquery", + "databricks", "datafusion", "druid", "exasol", diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index 1d9f7cfa0516..1a134104edb3 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -17,6 +17,7 @@ "postgres": "bytea", "risingwave": "bytea", "flink": "BINARY(1) NOT NULL", + "databricks": "binary", } diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 652592642911..9d03d080fa3c 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -26,6 +26,7 @@ import ibis.expr.operations as ops from ibis.backends.conftest import ALL_BACKENDS from ibis.backends.tests.errors import ( + DatabricksServerOperationError, ExaQueryError, ImpalaHiveServer2Error, OracleDatabaseError, @@ -116,7 +117,14 @@ def test_create_table(backend, con, temp_table, func, sch): marks=[ pytest.mark.notyet(["clickhouse"], reason="Can't specify both"), pytest.mark.notyet( - ["pyspark", "trino", "exasol", "risingwave", "impala"], + [ + "pyspark", + "trino", + "exasol", + "risingwave", + "impala", + "databricks", + ], reason="No support for temp tables", ), pytest.mark.notyet( @@ -142,7 +150,14 @@ def test_create_table(backend, con, temp_table, func, sch): id="temp, no overwrite", marks=[ pytest.mark.notyet( - ["pyspark", "trino", "exasol", "risingwave", "impala"], + [ + "pyspark", + "trino", + "exasol", + "risingwave", + "impala", + "databricks", + ], reason="No support for temp tables", ), pytest.mark.notimpl(["mssql"], reason="Incorrect temp table syntax"), @@ -295,7 +310,7 @@ def test_create_table_from_schema(con, new_schema, temp_table): raises=NotImplementedError, ) @pytest.mark.never( - ["risingwave"], + ["risingwave", "databricks"], raises=com.UnsupportedOperationError, reason="Feature is not yet implemented: CREATE TEMPORARY TABLE", ) @@ -362,7 +377,7 @@ def test_rename_table(con, temp_table, temp_table_orig): @mark.notimpl(["polars", "druid"]) -@mark.never(["impala", "pyspark"], reason="No non-nullable datatypes") +@mark.never(["impala", "pyspark", "databricks"], reason="No non-nullable datatypes") @pytest.mark.notimpl( ["flink"], raises=com.IbisError, @@ -634,6 +649,7 @@ def test_list_catalogs(con): "snowflake": {"IBIS_TESTING"}, "trino": {"memory"}, "pyspark": {"spark_catalog"}, + "databricks": {"hive_metastore", "ibis", "ibis_testing", "samples", "system"}, } result = set(con.list_catalogs()) assert test_catalogs[con.name] <= result @@ -663,6 +679,7 @@ def test_list_database_contents(con): "snowflake": {"IBIS_TESTING"}, "sqlite": {"main"}, "trino": {"default", "information_schema"}, + "databricks": {"default"}, } result = set(con.list_databases()) assert test_databases[con.name] <= result @@ -670,6 +687,7 @@ def test_list_database_contents(con): @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) @pytest.mark.notyet(["pyspark"], raises=com.IbisTypeError) +@pytest.mark.notyet(["databricks"], raises=DatabricksServerOperationError) @pytest.mark.notyet(["bigquery"], raises=com.UnsupportedBackendType) @pytest.mark.notyet( ["postgres"], raises=PsycoPg2UndefinedObject, reason="no unsigned int types" @@ -895,6 +913,7 @@ def test_self_join_memory_table(backend, con, monkeypatch): "snowflake", "sqlite", "trino", + "databricks", ] ) ], @@ -920,6 +939,7 @@ def test_self_join_memory_table(backend, con, monkeypatch): "snowflake", "sqlite", "trino", + "databricks", ] ) ], @@ -944,6 +964,7 @@ def test_self_join_memory_table(backend, con, monkeypatch): "snowflake", "sqlite", "trino", + "databricks", ], raises=com.UnsupportedOperationError, reason="we don't materialize datasets to avoid perf footguns", @@ -1288,6 +1309,7 @@ def test_set_backend_url(url, monkeypatch): "risingwave", "pyspark", "sqlite", + "databricks", ], reason="backend doesn't support timestamp with scale parameter", ) @@ -1456,6 +1478,9 @@ def test_close_connection(con): raises=TypeError, reason="snowflake uses a custom pyarrow extension type for JSON pretty printing", ) +@pytest.mark.notimpl( + ["databricks"], raises=json.JSONDecodeError, reason="not yet implemented" +) def test_json_to_pyarrow(con): t = con.tables.json_t table = t.to_pyarrow() @@ -1493,7 +1518,7 @@ def test_json_to_pyarrow(con): @pytest.mark.notyet( - ["risingwave", "exasol"], + ["risingwave", "exasol", "databricks"], raises=com.UnsupportedOperationError, reason="no temp table support", ) @@ -1659,6 +1684,12 @@ def test_cross_database_join(con_create_database, monkeypatch): @pytest.mark.notimpl( ["impala", "pyspark", "trino"], reason="Default constraints are not supported" ) +@pytest.mark.notimpl( + ["databricks"], + reason="Default constraints ARE supported, " + "but you have to enable them with a property AND set DEFAULT, so no", + raises=DatabricksServerOperationError, +) def test_insert_into_table_missing_columns(con, temp_table): db = getattr(con, "current_database", None) diff --git a/ibis/backends/tests/test_column.py b/ibis/backends/tests/test_column.py index c60b6d470654..76e92af92c65 100644 --- a/ibis/backends/tests/test_column.py +++ b/ibis/backends/tests/test_column.py @@ -23,6 +23,7 @@ "trino", "druid", "flink", + "databricks", ], raises=com.OperationNotDefinedError, ) diff --git a/ibis/backends/tests/test_examples.py b/ibis/backends/tests/test_examples.py index 0c8ab5e3a8fb..86cf2132873f 100644 --- a/ibis/backends/tests/test_examples.py +++ b/ibis/backends/tests/test_examples.py @@ -15,7 +15,7 @@ (LINUX or MACOS) and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", ) -@pytest.mark.notimpl(["pyspark", "exasol"]) +@pytest.mark.notimpl(["pyspark", "exasol", "databricks"]) @pytest.mark.notyet(["clickhouse", "druid", "impala", "mssql", "trino", "risingwave"]) @pytest.mark.parametrize( ("example", "columns"), diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 977243519862..6afaf56e3732 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -10,6 +10,7 @@ import ibis.expr.datatypes as dt from ibis import util from ibis.backends.tests.errors import ( + DatabricksServerOperationError, DuckDBNotImplementedException, DuckDBParserException, ExaQueryError, @@ -276,6 +277,7 @@ def test_table_to_parquet_writer_kwargs(version, tmp_path, backend, awards_playe "snowflake", "sqlite", "trino", + "databricks", ], reason="no partitioning support", ) @@ -384,6 +386,9 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): reason="precision is out of range", ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), + pytest.mark.notyet( + ["databricks"], raises=DatabricksServerOperationError + ), ], ), ], @@ -416,6 +421,7 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype): "trino", "exasol", "druid", + "databricks", # feels a bit weird given it's their format ¯\_(ツ)_/¯ ], raises=NotImplementedError, reason="read_delta not yet implemented", @@ -445,6 +451,9 @@ def test_roundtrip_delta(backend, con, alltypes, tmp_path, monkeypatch): raises=PyDruidProgrammingError, reason="Invalid SQL generated; druid doesn't know about TIMESTAMPTZ", ) +@pytest.mark.notimpl( + ["databricks"], raises=AssertionError, reason="Only the devil knows" +) def test_arrow_timestamp_with_time_zone(alltypes): from ibis.formats.pyarrow import PyArrowType diff --git a/ibis/backends/tests/test_expr_caching.py b/ibis/backends/tests/test_expr_caching.py index 03c73e3e9772..164ba8273e60 100644 --- a/ibis/backends/tests/test_expr_caching.py +++ b/ibis/backends/tests/test_expr_caching.py @@ -10,6 +10,13 @@ pa = pytest.importorskip("pyarrow") ds = pytest.importorskip("pyarrow.dataset") +pytestmark = [ + mark.notyet( + ["databricks"], + reason="Databricks does not support temporary tables, even though they allow the syntax", + ) +] + @mark.notimpl(["datafusion", "flink", "impala", "trino", "druid"]) @mark.notimpl(["exasol"], reason="Exasol does not support temporary tables") diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 9e6c90cabf0d..c676745a89f2 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -50,6 +50,7 @@ "trino": "unknown", "postgres": "null", "risingwave": "null", + "databricks": "void", } @@ -90,6 +91,7 @@ def test_null_literal_typed(con, backend): "postgres": "boolean", "risingwave": "boolean", "flink": "BOOLEAN NOT NULL", + "databricks": "boolean", } @@ -204,6 +206,7 @@ def test_isna(backend, alltypes, col, value, filt): "oracle", "exasol", "pyspark", + "databricks", ], reason="NaN != NULL for these backends", ), @@ -794,6 +797,11 @@ def test_table_info_large(con): raises=com.OperationNotDefinedError, reason="quantile is not supported", ), + pytest.mark.notimpl( + ["databricks"], + raises=AssertionError, + reason="timestamp column is discarded", + ), pytest.mark.notimpl( [ "clickhouse", @@ -1585,6 +1593,7 @@ def test_hash(backend, alltypes, dtype): "pyspark", "risingwave", "sqlite", + "databricks", ] ) def test_hashbytes(backend, alltypes): @@ -1750,7 +1759,8 @@ def test_try_cast(con, from_val, to_type, expected): "int", marks=[ pytest.mark.never( - ["clickhouse", "pyspark", "flink"], reason="casts to 1672531200" + ["clickhouse", "pyspark", "flink", "databricks"], + reason="casts to 1672531200", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), @@ -1813,7 +1823,7 @@ def test_try_cast_table(backend, con): pd.isna, marks=[ pytest.mark.notyet( - ["clickhouse", "polars", "flink", "pyspark"], + ["clickhouse", "polars", "flink", "pyspark", "databricks"], reason="casts this to to a number", ), pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), @@ -2039,7 +2049,10 @@ def test_static_table_slice(backend, slc, expected_count_fn): reason="impala doesn't support dynamic limit/offset", raises=ImpalaHiveServer2Error, ) -@pytest.mark.notyet(["pyspark"], reason="pyspark doesn't support dynamic limit/offset") +@pytest.mark.notyet( + ["pyspark", "databricks"], + reason="pyspark and databricks don't support dynamic limit/offset", +) @pytest.mark.notyet(["flink"], reason="flink doesn't support dynamic limit/offset") def test_dynamic_table_slice(backend, slc, expected_count_fn): t = backend.functional_alltypes @@ -2089,7 +2102,10 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): reason="impala doesn't support dynamic limit/offset", raises=ImpalaHiveServer2Error, ) -@pytest.mark.notyet(["pyspark"], reason="pyspark doesn't support dynamic limit/offset") +@pytest.mark.notyet( + ["pyspark", "databricks"], + reason="pyspark and databricks don't support dynamic limit/offset", +) @pytest.mark.notyet(["flink"], reason="flink doesn't support dynamic limit/offset") @pytest.mark.notyet( ["mssql"], @@ -2160,6 +2176,7 @@ def test_sample_memtable(con, backend): "trino", "exasol", "pyspark", + "databricks", ] ) def test_sample_with_seed(backend): diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index b81b1878a882..05e98733d3f5 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -386,6 +386,7 @@ def test_join_conflicting_columns(backend, con): "snowflake", "sqlite", "trino", + "databricks", ], reason="Users can implement this with ibis.row_number(): https://github.com/ibis-project/ibis/issues/9486", ) diff --git a/ibis/backends/tests/test_json.py b/ibis/backends/tests/test_json.py index d4d772beda26..d412666d1242 100644 --- a/ibis/backends/tests/test_json.py +++ b/ibis/backends/tests/test_json.py @@ -17,7 +17,9 @@ pytestmark = [ pytest.mark.never(["impala"], reason="doesn't support JSON and never will"), pytest.mark.notyet(["clickhouse"], reason="upstream is broken"), - pytest.mark.notimpl(["datafusion", "exasol", "mssql", "druid", "oracle"]), + pytest.mark.notimpl( + ["datafusion", "exasol", "mssql", "druid", "oracle", "databricks"] + ), ] diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index e8a80757cff9..cd55928dc773 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -237,6 +237,9 @@ def test_column_map_values(backend): @mark_notyet_datafusion +@pytest.mark.notyet( + ["databricks"], reason="says one thing, does something completely different" +) def test_column_map_merge(backend): table = backend.map expr = table.select( @@ -337,6 +340,9 @@ def test_map_column_contains_key_column(alltypes): @mark_notimpl_risingwave_hstore @mark_notyet_postgres @mark_notyet_datafusion +@pytest.mark.notyet( + ["databricks"], reason="says one thing, does something completely different" +) def test_literal_map_merge(con): a = ibis.literal({"a": 0, "b": 2}) b = ibis.literal({"a": 1, "c": 3}) diff --git a/ibis/backends/tests/test_network.py b/ibis/backends/tests/test_network.py index 8cc08b7dcb08..fbc22cd1b189 100644 --- a/ibis/backends/tests/test_network.py +++ b/ibis/backends/tests/test_network.py @@ -22,6 +22,7 @@ "postgres": "text", "risingwave": "text", "flink": "CHAR(17) NOT NULL", + "databricks": "string", } @@ -57,6 +58,7 @@ def test_macaddr_literal(con, backend): "mssql": "127.0.0.1", "datafusion": "127.0.0.1", "flink": "127.0.0.1", + "databricks": "127.0.0.1", }, { "bigquery": "STRING", @@ -69,6 +71,7 @@ def test_macaddr_literal(con, backend): "postgres": "text", "risingwave": "text", "flink": "CHAR(9) NOT NULL", + "databricks": "string", }, id="ipv4", ), @@ -89,6 +92,7 @@ def test_macaddr_literal(con, backend): "mssql": "2001:db8::1", "datafusion": "2001:db8::1", "flink": "2001:db8::1", + "databricks": "2001:db8::1", }, { "bigquery": "STRING", @@ -101,6 +105,7 @@ def test_macaddr_literal(con, backend): "postgres": "text", "risingwave": "text", "flink": "CHAR(11) NOT NULL", + "databricks": "string", }, id="ipv6", ), diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 70bfa9273ac5..b802668329c8 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -14,6 +14,7 @@ from ibis import _ from ibis import literal as L from ibis.backends.tests.errors import ( + DatabricksServerOperationError, DuckDBParserException, ExaQueryError, GoogleBadRequest, @@ -54,6 +55,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="int8", ), @@ -70,6 +72,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="int16", ), @@ -86,6 +89,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="int32", ), @@ -102,6 +106,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="int64", ), @@ -118,6 +123,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="uint8", ), @@ -134,6 +140,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="uint16", ), @@ -150,6 +157,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="uint32", ), @@ -166,6 +174,7 @@ "postgres": "integer", "risingwave": "integer", "flink": "INT NOT NULL", + "databricks": "int", }, id="uint64", ), @@ -182,6 +191,7 @@ "postgres": "numeric", "risingwave": "numeric", "flink": "DECIMAL(2, 1) NOT NULL", + "databricks": "decimal(2,1)", }, marks=[ pytest.mark.notimpl( @@ -205,6 +215,7 @@ "postgres": "numeric", "risingwave": "numeric", "flink": "DECIMAL(2, 1) NOT NULL", + "databricks": "decimal(2,1)", }, id="float32", ), @@ -221,6 +232,7 @@ "postgres": "numeric", "risingwave": "numeric", "flink": "DECIMAL(2, 1) NOT NULL", + "databricks": "decimal(2,1)", }, id="float64", ), @@ -259,6 +271,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "oracle": decimal.Decimal("1.1"), "flink": decimal.Decimal("1.1"), "polars": decimal.Decimal("1.1"), + "databricks": decimal.Decimal("1.1"), }, { "bigquery": "NUMERIC", @@ -271,6 +284,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": "numeric", "risingwave": "numeric", "flink": "DECIMAL(38, 18) NOT NULL", + "databricks": "decimal(38,18)", }, marks=[ pytest.mark.notimpl( @@ -302,6 +316,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "oracle": decimal.Decimal("1.1"), "flink": decimal.Decimal("1.1"), "polars": decimal.Decimal("1.1"), + "databricks": decimal.Decimal("1.1"), }, { "bigquery": "NUMERIC", @@ -314,6 +329,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": "numeric", "risingwave": "numeric", "flink": "DECIMAL(38, 9) NOT NULL", + "databricks": "decimal(38,9)", }, marks=[pytest.mark.notimpl(["exasol"], raises=ExaQueryError)], id="decimal-small", @@ -369,6 +385,11 @@ def test_numeric_literal(con, backend, expr, expected_types): ), pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError), pytest.mark.notyet(["polars"], raises=RuntimeError), + pytest.mark.notyet( + ["databricks"], + reason="Unsupported precision.", + raises=DatabricksServerOperationError, + ), ], id="decimal-big", ), @@ -383,12 +404,14 @@ def test_numeric_literal(con, backend, expr, expected_types): "pyspark": decimal.Decimal("Infinity"), "exasol": float("inf"), "duckdb": float("inf"), + "databricks": decimal.Decimal("Infinity"), }, { "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", "duckdb": "FLOAT", + "databricks": "double", }, marks=[ pytest.mark.notyet( @@ -446,12 +469,14 @@ def test_numeric_literal(con, backend, expr, expected_types): "pyspark": decimal.Decimal("-Infinity"), "exasol": float("-inf"), "duckdb": float("-inf"), + "databricks": decimal.Decimal("-Infinity"), }, { "sqlite": "real", "postgres": "numeric", "risingwave": "numeric", "duckdb": "FLOAT", + "databricks": "double", }, marks=[ pytest.mark.notyet( @@ -510,6 +535,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "pyspark": decimal.Decimal("NaN"), "exasol": float("nan"), "duckdb": float("nan"), + "databricks": decimal.Decimal("NaN"), }, { "bigquery": "FLOAT64", @@ -518,6 +544,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "postgres": "numeric", "risingwave": "numeric", "duckdb": "FLOAT", + "databricks": "double", }, marks=[ pytest.mark.notyet( @@ -1277,6 +1304,7 @@ def test_floating_mod(backend, alltypes, df): ], ) @pytest.mark.notyet(["mysql", "pyspark"], raises=AssertionError) +@pytest.mark.notyet(["databricks"], raises=AssertionError, reason="returns NaNs") @pytest.mark.notyet( ["sqlite"], raises=AssertionError, reason="returns NULL when dividing by zero" ) diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index 0ed6925a2ce7..1ca96eb42221 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -25,6 +25,7 @@ pytest.mark.notyet( ["pyspark"], condition=IS_SPARK_REMOTE, raises=PySparkAnalysisException ), + pytest.mark.never(["databricks"], reason="no register method"), ] @@ -101,6 +102,7 @@ def gzip_csv(data_dir, tmp_path): "snowflake", "sqlite", "trino", + "databricks", ] ) def test_register_csv(con, data_dir, fname, in_table_name, out_table_name): @@ -128,6 +130,7 @@ def test_register_csv(con, data_dir, fname, in_table_name, out_table_name): "snowflake", "sqlite", "trino", + "databricks", ] ) def test_register_csv_gz(con, data_dir, gzip_csv): diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index d1065867c0bf..dbd7286731d7 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -56,7 +56,9 @@ def _scrape_methods(modules, params): "snowflake", "sqlite", "trino", - ] + "databricks", + ], + reason="SQL backends all have an additional `pretty` argument for formatting the generated SQL", ), ), "create_database": pytest.param( diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 9f94744cc29d..f45619874e80 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -171,10 +171,13 @@ def test_union_aliasing(backend_name, snapshot): ), param( ibis.uuid(), - marks=pytest.mark.notimpl( - ["exasol", "risingwave", "druid", "oracle", "pyspark"], - raises=exc.OperationNotDefinedError, - ), + marks=[ + pytest.mark.notimpl( + ["exasol", "risingwave", "druid", "oracle"], + raises=exc.OperationNotDefinedError, + ), + pytest.mark.notimpl(["pyspark"], raises=exc.UnsupportedOperationError), + ], id="uuid", ), ], diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index cb51c30aa273..827aea4a22b0 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -40,6 +40,7 @@ "postgres": "text", "risingwave": "text", "flink": "CHAR(6) NOT NULL", + "databricks": "string", }, id="string", ), @@ -56,6 +57,7 @@ "postgres": "text", "risingwave": "text", "flink": "CHAR(7) NOT NULL", + "databricks": "string", }, id="string-quote1", marks=[ @@ -84,6 +86,7 @@ "postgres": "text", "risingwave": "text", "flink": "CHAR(7) NOT NULL", + "databricks": "string", }, id="string-quote2", marks=[ @@ -440,6 +443,7 @@ def uses_java_re(t): "druid", "oracle", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -467,6 +471,7 @@ def uses_java_re(t): "druid", "oracle", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -776,6 +781,7 @@ def test_substr_with_null_values(backend, alltypes, df): "pyspark", "druid", "oracle", + "databricks", ], raises=com.OperationNotDefinedError, ) @@ -1038,12 +1044,12 @@ def string_temp_table(backend, con): ) temp_table_name = gen_name("strings") - temp = backend.name() not in ["exasol", "impala", "pyspark", "risingwave", "trino"] if backend.name() == "druid": - yield "I HATE DRUID" + pytest.xfail("druid doesn't support create table") else: - t = con.create_table(temp_table_name, better_strings, temp=temp) - yield t + yield con.create_table( + temp_table_name, better_strings, temp=backend.name() == "flink" or None + ) con.drop_table(temp_table_name, force=True) @@ -1189,6 +1195,7 @@ def string_temp_table(backend, con): "snowflake", "sqlite", "trino", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -1223,6 +1230,7 @@ def string_temp_table(backend, con): "snowflake", "sqlite", "trino", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -1234,7 +1242,7 @@ def string_temp_table(backend, con): id="lstrip", marks=[ pytest.mark.notyet( - ["pyspark"], + ["pyspark", "databricks"], raises=AssertionError, reason="Spark SQL LTRIM doesn't accept characters to trim", ), @@ -1246,7 +1254,7 @@ def string_temp_table(backend, con): id="rstrip", marks=[ pytest.mark.notyet( - ["pyspark"], + ["pyspark", "databricks"], raises=AssertionError, reason="Spark SQL RTRIM doesn't accept characters to trim", ), @@ -1338,14 +1346,12 @@ def string_temp_table_no_complications(backend, con): ) temp_table_name = gen_name("strings") - temp = backend.name() not in ["exasol", "impala", "pyspark", "risingwave", "trino"] - if backend.name() == "datafusion": - temp = None if backend.name() == "druid": - yield "I HATE DRUID" + pytest.xfail("druid doesn't support create table") else: - t = con.create_table(temp_table_name, better_strings, temp=temp) - yield t + yield con.create_table( + temp_table_name, better_strings, temp=backend.name() == "flink" or None + ) con.drop_table(temp_table_name, force=True) diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 3098e349baca..164cd0ef83d2 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -10,6 +10,7 @@ import ibis.expr.datatypes as dt from ibis import util from ibis.backends.tests.errors import ( + DatabricksServerOperationError, PolarsColumnNotFoundError, PsycoPg2InternalError, PsycoPg2SyntaxError, @@ -201,6 +202,11 @@ def test_field_access_after_case(con): reason="snowflake doesn't have strongly typed structs", ) @pytest.mark.notyet(["datafusion"], raises=Exception, reason="unsupported syntax") +@pytest.mark.notyet( + ["databricks"], + raises=DatabricksServerOperationError, + reason="spaces are not allowed in column names", +) def test_keyword_fields(con, nullable): schema = ibis.schema( { diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 9c08d7fe3245..8b772d37feab 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -119,7 +119,16 @@ def test_timestamp_extract(backend, alltypes, df, attr): reason="AttributeError: 'StringColumn' object has no attribute 'X'", ) @pytest.mark.notyet( - ["mysql", "sqlite", "mssql", "impala", "datafusion", "pyspark", "flink"], + [ + "mysql", + "sqlite", + "mssql", + "impala", + "datafusion", + "pyspark", + "flink", + "databricks", + ], raises=com.OperationNotDefinedError, reason="backend doesn't appear to support this operation directly", ) @@ -140,7 +149,16 @@ def test_extract_iso_year(backend, alltypes, df, transform): reason="AttributeError: 'StringColumn' object has no attribute 'X'", ) @pytest.mark.notyet( - ["mysql", "sqlite", "mssql", "impala", "datafusion", "pyspark", "flink"], + [ + "mysql", + "sqlite", + "mssql", + "impala", + "datafusion", + "pyspark", + "flink", + "databricks", + ], raises=com.OperationNotDefinedError, reason="backend doesn't appear to support this operation directly", ) @@ -206,7 +224,7 @@ def test_timestamp_extract_literal(con, func, expected): @pytest.mark.notimpl(["oracle", "druid"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( - ["pyspark"], + ["pyspark", "databricks"], raises=com.UnsupportedOperationError, reason="PySpark backend does not support extracting microseconds.", ) @@ -355,6 +373,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): "datafusion", "exasol", "druid", + "databricks", ], raises=com.UnsupportedOperationError, ), @@ -751,6 +770,11 @@ def convert_to_offset(x): raises=PySparkConnectGrpcException, reason="arrow conversion breaks", ), + pytest.mark.notyet( + ["databricks"], + raises=AssertionError, + reason="apparent over/underflow", + ), pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), pytest.mark.notimpl( ["duckdb"], @@ -806,6 +830,11 @@ def convert_to_offset(x): raises=PySparkConnectGrpcException, reason="arrow conversion breaks", ), + pytest.mark.notyet( + ["databricks"], + raises=AssertionError, + reason="apparent over/underflow", + ), ], ), ], @@ -1042,7 +1071,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): "ms", marks=[ pytest.mark.notimpl( - ["pyspark"], + ["pyspark", "databricks"], raises=com.UnsupportedArgumentError, reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.", ), @@ -1057,7 +1086,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): "us", marks=[ pytest.mark.notimpl( - ["pyspark"], + ["pyspark", "databricks"], raises=com.UnsupportedArgumentError, reason="PySpark backend does not support timestamp from unix time with unit us. Supported unit is s.", ), @@ -1078,7 +1107,7 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): "ns", marks=[ pytest.mark.notimpl( - ["pyspark"], + ["pyspark", "databricks"], raises=com.UnsupportedArgumentError, reason="PySpark backend does not support timestamp from unix time with unit ms. Supported unit is s.", ), @@ -1381,6 +1410,7 @@ def test_today_from_projection(alltypes): "sqlite": "text", "trino": "date", "risingwave": "date", + "databricks": "date", } @@ -1406,11 +1436,12 @@ def test_date_literal(con, backend): "postgres": "timestamp without time zone", "risingwave": "timestamp without time zone", "flink": "TIMESTAMP(6) NOT NULL", + "databricks": "timestamp", } @pytest.mark.notimpl( - ["pyspark", "mysql", "exasol", "oracle"], + ["pyspark", "mysql", "exasol", "oracle", "databricks"], raises=com.OperationNotDefinedError, ) @pytest.mark.notyet(["impala"], raises=com.OperationNotDefinedError) @@ -1427,7 +1458,7 @@ def test_timestamp_literal(con, backend): @pytest.mark.notimpl( - ["mysql", "pyspark", "exasol"], raises=com.OperationNotDefinedError + ["mysql", "pyspark", "exasol", "databricks"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) @pytest.mark.parametrize( @@ -1487,7 +1518,7 @@ def test_timestamp_with_timezone_literal(con, timezone, expected): @pytest.mark.notimpl( - ["datafusion", "pyspark", "polars", "mysql", "oracle"], + ["datafusion", "pyspark", "polars", "mysql", "oracle", "databricks"], raises=com.OperationNotDefinedError, ) @pytest.mark.notyet( @@ -1540,6 +1571,11 @@ def test_time_literal(con, backend): ids=["second", "subsecond"], ) @pytest.mark.notimpl(["exasol"], raises=ExaQueryError) +@pytest.mark.notimpl( + ["databricks"], + raises=AssertionError, + reason="returns a timedelta instead of a time", +) def test_extract_time_from_timestamp(con, microsecond): raw_ts = datetime.datetime(2023, 1, 7, 13, 20, 5, microsecond) ts = ibis.timestamp(raw_ts) @@ -1580,7 +1616,7 @@ def test_extract_time_from_timestamp(con, microsecond): ) @pytest.mark.notimpl( ["bigquery", "duckdb"], - reason="BigQuery returns DateOffset arrays", + reason="backend returns DateOffset arrays", raises=AssertionError, ) @pytest.mark.notyet( @@ -1602,6 +1638,11 @@ def test_extract_time_from_timestamp(con, microsecond): ), ) @pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) +@pytest.mark.notimpl( + ["databricks"], + reason="returns a different string format than expected in the test", + raises=AssertionError, +) def test_interval_literal(con, backend): expr = ibis.interval(1, unit="s") result = con.execute(expr) @@ -1624,7 +1665,7 @@ def test_date_column_from_ymd(backend, con, alltypes, df): @pytest.mark.notimpl( - ["pyspark", "mysql", "exasol"], raises=com.OperationNotDefinedError + ["pyspark", "mysql", "exasol", "databricks"], raises=com.OperationNotDefinedError ) @pytest.mark.notyet(["impala", "oracle"], raises=com.OperationNotDefinedError) def test_timestamp_column_from_ymdhms(backend, con, alltypes, df): @@ -1708,6 +1749,11 @@ def test_integer_cast_to_timestamp_scalar(alltypes, df): condition=not IS_SPARK_REMOTE, raises=pd.errors.OutOfBoundsDatetime, ) +@pytest.mark.notyet( + ["databricks"], + reason="returns a value with a timezone, which the test doesn't expect", + raises=AssertionError, +) @pytest.mark.notimpl(["flink"], raises=ArrowInvalid) @pytest.mark.notyet( ["polars"], raises=AssertionError, reason="produces an incorrect result" @@ -1814,7 +1860,7 @@ def test_large_timestamp(con): id="ns", marks=[ pytest.mark.notyet( - ["impala", "pyspark", "trino"], + ["impala", "pyspark", "trino", "databricks"], reason="drivers appear to truncate nanos", raises=AssertionError, ), @@ -1915,9 +1961,9 @@ def test_timestamp_precision_output(con, ts, scale, unit): id="timestamp", marks=[ pytest.mark.notimpl( - ["pyspark"], + ["pyspark", "databricks"], raises=AssertionError, - reason="pyspark difference is timezone aware", + reason="backend computes timezone aware difference", ), pytest.mark.notimpl( ["mysql"], @@ -1938,7 +1984,7 @@ def test_delta(con, start, end, unit, expected): @pytest.mark.notimpl( - ["impala", "mysql", "pyspark", "sqlite", "trino", "druid"], + ["impala", "mysql", "pyspark", "sqlite", "trino", "druid", "databricks"], raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize( @@ -2040,7 +2086,17 @@ def test_timestamp_bucket(backend, kws, pd_freq): @pytest.mark.notimpl( - ["datafusion", "impala", "mysql", "oracle", "pyspark", "sqlite", "trino", "druid"], + [ + "datafusion", + "impala", + "mysql", + "oracle", + "pyspark", + "sqlite", + "trino", + "druid", + "databricks", + ], raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl( diff --git a/ibis/backends/tests/test_udf.py b/ibis/backends/tests/test_udf.py index f27397dc256d..fd5980d0081a 100644 --- a/ibis/backends/tests/test_udf.py +++ b/ibis/backends/tests/test_udf.py @@ -21,6 +21,7 @@ "oracle", "trino", "risingwave", + "databricks", ] ) cloudpickle_version_mismatch = mark.notimpl( diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 85e72db454a9..15f59f1bdbd3 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -25,6 +25,7 @@ "snowflake": "VARCHAR", "sqlite": "text", "trino": "uuid", + "databricks": "string", } @@ -42,9 +43,10 @@ def test_uuid_literal(con, backend): @pytest.mark.notimpl( - ["druid", "exasol", "oracle", "polars", "pyspark", "risingwave"], + ["druid", "exasol", "oracle", "polars", "risingwave"], raises=com.OperationNotDefinedError, ) +@pytest.mark.notimpl(["pyspark"], raises=com.UnsupportedOperationError) @pytest.mark.never( ["mysql"], raises=AssertionError, reason="MySQL generates version 1 UUIDs" ) @@ -55,9 +57,10 @@ def test_uuid_function(con): @pytest.mark.notimpl( - ["druid", "exasol", "oracle", "polars", "pyspark", "risingwave"], + ["druid", "exasol", "oracle", "polars", "risingwave"], raises=com.OperationNotDefinedError, ) +@pytest.mark.notimpl(["pyspark"], raises=com.UnsupportedOperationError) def test_uuid_unique_each_row(con): expr = ( con.tables.functional_alltypes.mutate(uuid=ibis.uuid()).limit(2).uuid.nunique() diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index c57bacb06698..831b7a157b23 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -367,6 +367,7 @@ def test_grouped_bounded_expanding_window( "datafusion", "trino", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -555,6 +556,7 @@ def test_grouped_bounded_preceding_window( "trino", "datafusion", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -719,6 +721,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "trino", "datafusion", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -753,6 +756,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "datafusion", "exasol", "flink", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -870,6 +874,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "trino", "datafusion", "exasol", + "databricks", ], raises=com.OperationNotDefinedError, ), @@ -899,6 +904,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "datafusion", "exasol", "flink", + "databricks", ], raises=com.OperationNotDefinedError, ), diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index 54638f706d90..2d7bed3b85af 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -42,6 +42,12 @@ def do_connect(self): def disconnect(self): pass + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + pass + + def _finalize_memtable(self, name: str) -> None: + pass + def table(self, name, **kwargs): schema = self.get_schema(name) node = ops.DatabaseTable(source=self, name=name, schema=schema) diff --git a/poetry.lock b/poetry.lock index 35be9572125e..8500c4b2a9ad 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1339,6 +1339,35 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "databricks-sql-connector" +version = "3.4.0" +description = "Databricks SQL Connector for Python" +optional = true +python-versions = "<4.0.0,>=3.8.0" +files = [ + {file = "databricks_sql_connector-3.4.0-py3-none-any.whl", hash = "sha256:7ba2efa4149529dee418ec467bacff1cb34c321a43e597d41fd020e569cbba3f"}, + {file = "databricks_sql_connector-3.4.0.tar.gz", hash = "sha256:5def7762a398e025db6a5740649f3ea856f07dc04a87cb7818af335f4157c030"}, +] + +[package.dependencies] +lz4 = ">=4.0.2,<5.0.0" +numpy = [ + {version = ">=1.23.4,<2.0.0", markers = "python_version >= \"3.11\""}, + {version = ">=1.16.6,<2.0.0", markers = "python_version >= \"3.8\" and python_version < \"3.11\""}, +] +oauthlib = ">=3.1.0,<4.0.0" +openpyxl = ">=3.0.10,<4.0.0" +pandas = {version = ">=1.2.5,<2.3.0", markers = "python_version >= \"3.8\""} +pyarrow = ">=14.0.1,<17" +requests = ">=2.18.1,<3.0.0" +thrift = ">=0.16.0,<0.21.0" +urllib3 = ">=1.26" + +[package.extras] +alembic = ["alembic (>=1.0.11,<2.0.0)", "sqlalchemy (>=2.0.21)"] +sqlalchemy = ["sqlalchemy (>=2.0.21)"] + [[package]] name = "datafusion" version = "41.0.0" @@ -1642,6 +1671,17 @@ files = [ [package.dependencies] packaging = ">=20.9" +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = true +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -3998,64 +4038,47 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" [[package]] name = "numpy" -version = "2.1.1" +version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "numpy-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8a0e34993b510fc19b9a2ce7f31cb8e94ecf6e924a40c0c9dd4f62d0aac47d9"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7dd86dfaf7c900c0bbdcb8b16e2f6ddf1eb1fe39c6c8cca6e94844ed3152a8fd"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:5889dd24f03ca5a5b1e8a90a33b5a0846d8977565e4ae003a63d22ecddf6782f"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:59ca673ad11d4b84ceb385290ed0ebe60266e356641428c845b39cd9df6713ab"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ce49a34c44b6de5241f0b38b07e44c1b2dcacd9e36c30f9c2fcb1bb5135db7"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913cc1d311060b1d409e609947fa1b9753701dac96e6581b58afc36b7ee35af6"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:caf5d284ddea7462c32b8d4a6b8af030b6c9fd5332afb70e7414d7fdded4bfd0"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:57eb525e7c2a8fdee02d731f647146ff54ea8c973364f3b850069ffb42799647"}, - {file = "numpy-2.1.1-cp310-cp310-win32.whl", hash = "sha256:9a8e06c7a980869ea67bbf551283bbed2856915f0a792dc32dd0f9dd2fb56728"}, - {file = "numpy-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:d10c39947a2d351d6d466b4ae83dad4c37cd6c3cdd6d5d0fa797da56f710a6ae"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d07841fd284718feffe7dd17a63a2e6c78679b2d386d3e82f44f0108c905550"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5613cfeb1adfe791e8e681128f5f49f22f3fcaa942255a6124d58ca59d9528f"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0b8cc2715a84b7c3b161f9ebbd942740aaed913584cae9cdc7f8ad5ad41943d0"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b49742cdb85f1f81e4dc1b39dcf328244f4d8d1ded95dea725b316bd2cf18c95"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d5f8a8e3bc87334f025194c6193e408903d21ebaeb10952264943a985066ca"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d51fc141ddbe3f919e91a096ec739f49d686df8af254b2053ba21a910ae518bf"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:98ce7fb5b8063cfdd86596b9c762bf2b5e35a2cdd7e967494ab78a1fa7f8b86e"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:24c2ad697bd8593887b019817ddd9974a7f429c14a5469d7fad413f28340a6d2"}, - {file = "numpy-2.1.1-cp311-cp311-win32.whl", hash = "sha256:397bc5ce62d3fb73f304bec332171535c187e0643e176a6e9421a6e3eacef06d"}, - {file = "numpy-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ae8ce252404cdd4de56dcfce8b11eac3c594a9c16c231d081fb705cf23bd4d9e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c803b7934a7f59563db459292e6aa078bb38b7ab1446ca38dd138646a38203e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6435c48250c12f001920f0751fe50c0348f5f240852cfddc5e2f97e007544cbe"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3269c9eb8745e8d975980b3a7411a98976824e1fdef11f0aacf76147f662b15f"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:fac6e277a41163d27dfab5f4ec1f7a83fac94e170665a4a50191b545721c6521"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd8f556cdc8cfe35e70efb92463082b7f43dd7e547eb071ffc36abc0ca4699b"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b9cd92c8f8e7b313b80e93cedc12c0112088541dcedd9197b5dee3738c1201"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:afd9c680df4de71cd58582b51e88a61feed4abcc7530bcd3d48483f20fc76f2a"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8661c94e3aad18e1ea17a11f60f843a4933ccaf1a25a7c6a9182af70610b2313"}, - {file = "numpy-2.1.1-cp312-cp312-win32.whl", hash = "sha256:950802d17a33c07cba7fd7c3dcfa7d64705509206be1606f196d179e539111ed"}, - {file = "numpy-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:3fc5eabfc720db95d68e6646e88f8b399bfedd235994016351b1d9e062c4b270"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:046356b19d7ad1890c751b99acad5e82dc4a02232013bd9a9a712fddf8eb60f5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6e5a9cb2be39350ae6c8f79410744e80154df658d5bea06e06e0ac5bb75480d5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:d4c57b68c8ef5e1ebf47238e99bf27657511ec3f071c465f6b1bccbef12d4136"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:8ae0fd135e0b157365ac7cc31fff27f07a5572bdfc38f9c2d43b2aff416cc8b0"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981707f6b31b59c0c24bcda52e5605f9701cb46da4b86c2e8023656ad3e833cb"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ca4b53e1e0b279142113b8c5eb7d7a877e967c306edc34f3b58e9be12fda8df"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e097507396c0be4e547ff15b13dc3866f45f3680f789c1a1301b07dadd3fbc78"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7506387e191fe8cdb267f912469a3cccc538ab108471291636a96a54e599556"}, - {file = "numpy-2.1.1-cp313-cp313-win32.whl", hash = "sha256:251105b7c42abe40e3a689881e1793370cc9724ad50d64b30b358bbb3a97553b"}, - {file = "numpy-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:f212d4f46b67ff604d11fff7cc62d36b3e8714edf68e44e9760e19be38c03eb0"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:920b0911bb2e4414c50e55bd658baeb78281a47feeb064ab40c2b66ecba85553"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bab7c09454460a487e631ffc0c42057e3d8f2a9ddccd1e60c7bb8ed774992480"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:cea427d1350f3fd0d2818ce7350095c1a2ee33e30961d2f0fef48576ddbbe90f"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:e30356d530528a42eeba51420ae8bf6c6c09559051887196599d96ee5f536468"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8dfa9e94fc127c40979c3eacbae1e61fda4fe71d84869cc129e2721973231ef"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:910b47a6d0635ec1bd53b88f86120a52bf56dcc27b51f18c7b4a2e2224c29f0f"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:13cc11c00000848702322af4de0147ced365c81d66053a67c2e962a485b3717c"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53e27293b3a2b661c03f79aa51c3987492bd4641ef933e366e0f9f6c9bf257ec"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7be6a07520b88214ea85d8ac8b7d6d8a1839b0b5cb87412ac9f49fa934eb15d5"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:52ac2e48f5ad847cd43c4755520a2317f3380213493b9d8a4c5e37f3b87df504"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50a95ca3560a6058d6ea91d4629a83a897ee27c00630aed9d933dff191f170cd"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:99f4a9ee60eed1385a86e82288971a51e71df052ed0b2900ed30bc840c0f2e39"}, - {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] [[package]] @@ -4074,6 +4097,20 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = true +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "oracledb" version = "2.4.1" @@ -4922,55 +4959,52 @@ files = [ [[package]] name = "pyarrow" -version = "17.0.0" +version = "16.1.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"}, - {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"}, - {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"}, - {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"}, - {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"}, - {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"}, - {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"}, - {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"}, - {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, - {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, - {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, - {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, - {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, - {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, + {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, + {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"}, + {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"}, + {file = "pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c"}, + {file = "pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b"}, + {file = "pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b"}, + {file = "pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f"}, + {file = "pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7"}, + {file = "pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444"}, + {file = "pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f"}, + {file = "pyarrow-16.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed"}, + {file = "pyarrow-16.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710"}, + {file = "pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55"}, + {file = "pyarrow-16.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3"}, + {file = "pyarrow-16.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a"}, + {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"}, ] [package.dependencies] numpy = ">=1.16.6" -[package.extras] -test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] - [[package]] name = "pyarrow-hotfix" version = "0.6" @@ -7906,6 +7940,7 @@ cffi = ["cffi (>=1.11)"] [extras] bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pydata-google-auth", "rich"] clickhouse = ["clickhouse-connect", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] +databricks = ["databricks-sql-connector", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] datafusion = ["datafusion", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] decompiler = ["black"] deltalake = ["deltalake"] @@ -7931,4 +7966,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "affac0818092aef3a721fda1823e3f2b0b180db58a190bd0d472ec20aaa4c34c" +content-hash = "d548b0a143aa3d11f3a9aa08516ea69600be759d99f401097e4036b458bbfe72" diff --git a/pyproject.toml b/pyproject.toml index 39476b9b67f4..e6c125144703 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ clickhouse-connect = { version = ">=0.5.23,<1", optional = true, extras = [ "numpy", "pandas", ] } +databricks-sql-connector = { version = ">=3.4.0,<4", optional = true } datafusion = { version = ">=0.6,<42", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } @@ -162,6 +163,14 @@ clickhouse = [ "pandas", "rich", ] +databricks = [ + "databricks-sql-connector", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] datafusion = [ "datafusion", "pyarrow", @@ -241,6 +250,7 @@ geospatial = ["geoarrow-types", "geopandas", "pyproj", "shapely"] [tool.poetry.plugins."ibis.backends"] bigquery = "ibis.backends.bigquery" clickhouse = "ibis.backends.clickhouse" +databricks = "ibis.backends.databricks" datafusion = "ibis.backends.datafusion" druid = "ibis.backends.druid" duckdb = "ibis.backends.duckdb" @@ -368,6 +378,7 @@ markers = [ "never: The backend will never support this / pass this test. Don't bother trying to fix it", "bigquery: BigQuery tests", "clickhouse: ClickHouse tests", + "databricks: Databricks SQL tests", "datafusion: Apache Datafusion tests", "druid: Apache Druid tests", "duckdb: DuckDB tests", diff --git a/requirements-dev.txt b/requirements-dev.txt index 066531666538..191f30cf57cf 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -42,6 +42,7 @@ coverage[toml]==7.6.1 ; python_version >= "3.10" and python_version < "4.0" crashtest==0.4.1 ; python_version >= "3.10" and python_version < "4.0" cryptography==43.0.1 ; python_version >= "3.10" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" +databricks-sql-connector==3.4.0 ; python_version >= "3.10" and python_version < "4.0" datafusion==41.0.0 ; python_version >= "3.10" and python_version < "4.0" db-dtypes==1.3.0 ; python_version >= "3.10" and python_version < "4.0" debugpy==1.8.5 ; python_version >= "3.10" and python_version < "3.13" @@ -53,6 +54,7 @@ doit==0.36.0 ; python_version >= "3.10" and python_version < "3.13" duckdb==1.1.1 ; python_version >= "3.10" and python_version < "4.0" dulwich==0.21.7 ; python_version >= "3.10" and python_version < "4.0" dunamai==1.22.0 ; python_version >= "3.10" and python_version < "4.0" +et-xmlfile==1.1.0 ; python_version >= "3.10" and python_version < "4.0" exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" execnet==2.1.1 ; python_version >= "3.10" and python_version < "4.0" executing==2.1.0 ; python_version >= "3.10" and python_version < "4.0" @@ -145,8 +147,9 @@ nbformat==5.10.4 ; python_version >= "3.10" and python_version < "3.13" nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "3.13" nodeenv==1.9.1 ; python_version >= "3.10" and python_version < "4.0" notebook-shim==0.2.4 ; python_version >= "3.10" and python_version < "3.13" -numpy==2.1.1 ; python_version >= "3.10" and python_version < "4.0" +numpy==1.26.4 ; python_version >= "3.10" and python_version < "4.0" oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "4.0" +openpyxl==3.1.5 ; python_version >= "3.10" and python_version < "4.0" oracledb==2.4.1 ; python_version >= "3.10" and python_version < "4.0" overrides==7.7.0 ; python_version >= "3.10" and python_version < "3.13" packaging==24.1 ; python_version >= "3.10" and python_version < "4.0" @@ -187,7 +190,7 @@ pure-sasl==0.6.2 ; python_version >= "3.10" and python_version < "4.0" py-cpuinfo==9.0.0 ; python_version >= "3.10" and python_version < "4.0" py4j==0.10.9.7 ; python_version >= "3.10" and python_version < "4.0" pyarrow-hotfix==0.6 ; python_version >= "3.10" and python_version < "4.0" -pyarrow==17.0.0 ; python_version >= "3.10" and python_version < "4.0" +pyarrow==16.1.0 ; python_version >= "3.10" and python_version < "4.0" pyasn1-modules==0.4.1 ; python_version >= "3.10" and python_version < "4.0" pyasn1==0.6.1 ; python_version >= "3.10" and python_version < "4" pycparser==2.22 ; python_version >= "3.10" and python_version < "4.0"