From cff210af96323e200a31888b070d126be20a5eb0 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:43:32 -0400 Subject: [PATCH] refactor(dependencies): pandas and numpy are now optional for non-backend installs (#9564) --- .github/workflows/ibis-main.yml | 8 +- .github/workflows/nix.yml | 11 +- flake.nix | 4 +- ibis/backends/bigquery/client.py | 19 +- ibis/backends/conftest.py | 51 +---- ibis/backends/duckdb/converter.py | 4 +- ibis/backends/duckdb/tests/test_client.py | 8 +- ibis/backends/mysql/__init__.py | 3 +- ibis/backends/oracle/__init__.py | 3 +- ibis/backends/polars/compiler.py | 8 +- ibis/backends/postgres/__init__.py | 4 +- ibis/backends/sqlite/converter.py | 3 +- ibis/backends/tests/base.py | 7 +- ibis/backends/tests/data.py | 4 +- ibis/backends/tests/test_aggregation.py | 5 +- ibis/backends/tests/test_array.py | 7 +- ibis/backends/tests/test_asof_join.py | 5 +- ibis/backends/tests/test_client.py | 53 ++++- ibis/backends/tests/test_conditionals.py | 12 +- ibis/backends/tests/test_dot_sql.py | 5 +- ibis/backends/tests/test_export.py | 2 +- ibis/backends/tests/test_generic.py | 7 +- ibis/backends/tests/test_join.py | 5 +- ibis/backends/tests/test_json.py | 5 +- ibis/backends/tests/test_map.py | 6 +- ibis/backends/tests/test_numeric.py | 5 +- ibis/backends/tests/test_param.py | 5 +- ibis/backends/tests/test_set_ops.py | 3 +- ibis/backends/tests/test_string.py | 5 +- ibis/backends/tests/test_struct.py | 7 +- ibis/backends/tests/test_temporal.py | 5 +- ibis/backends/tests/test_vectorized_udf.py | 5 +- ibis/backends/tests/test_window.py | 5 +- ibis/backends/tests/tpc/conftest.py | 3 +- ibis/common/temporal.py | 4 + ibis/common/tests/test_temporal.py | 73 ++++--- .../tests/test_pandas_numpy_value.py | 199 ++++++++++++++++++ ibis/expr/datatypes/tests/test_value.py | 161 -------------- ibis/expr/tests/test_api.py | 27 ++- ibis/expr/tests/test_format.py | 4 +- ibis/expr/tests/test_schema.py | 29 +-- ibis/expr/tests/test_sql.py | 2 + ibis/expr/types/core.py | 31 ++- ibis/expr/types/generic.py | 3 +- ibis/expr/types/relations.py | 15 +- ibis/formats/pandas.py | 11 +- ibis/formats/pyarrow.py | 142 +++++-------- ibis/formats/tests/test_numpy.py | 8 +- ibis/formats/tests/test_pandas.py | 9 +- ibis/legacy/udf/vectorized.py | 7 +- ibis/tests/expr/test_pretty_repr.py | 11 +- ibis/tests/expr/test_table.py | 44 ++-- ibis/tests/expr/test_timestamp.py | 45 ++-- ibis/tests/expr/test_value_exprs.py | 45 ++-- ibis/tests/expr/test_window_frames.py | 7 +- ibis/tests/strategies.py | 5 +- ibis/tests/test_strategies.py | 3 +- nix/ibis-core.nix | 14 ++ nix/ibis-local.nix | 19 ++ nix/ibis.nix | 33 ++- nix/overlay.nix | 10 +- poetry.lock | 42 ++-- pyproject.toml | 120 ++++++++--- 63 files changed, 794 insertions(+), 621 deletions(-) create mode 100644 ibis/expr/datatypes/tests/test_pandas_numpy_value.py create mode 100644 nix/ibis-core.nix create mode 100644 nix/ibis-local.nix diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 16ffbc2ab553..9d386d86deb0 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -80,13 +80,13 @@ jobs: - name: install ibis run: poetry install --without dev --without docs --extras "visualization decompiler" - - name: install pyarrow + - name: install numpy/pandas/pyarrow if: matrix.pyarrow - run: poetry run pip install pyarrow pyarrow-hotfix + run: poetry run pip install numpy pandas pyarrow pyarrow-hotfix - - name: check pyarrow import + - name: check imports if: matrix.pyarrow - run: poetry run python -c 'import pyarrow, pyarrow_hotfix' + run: poetry run python -c 'import numpy, pandas, pyarrow, pyarrow_hotfix' - uses: extractions/setup-just@v2 env: diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index a967c1d3ab0c..9e8600b70f9f 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -59,12 +59,19 @@ jobs: name: ibis authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} extraPullNames: nix-community,poetry2nix - - name: nix build and test + - name: nix build and test core run: | set -euo pipefail version='${{ matrix.python-version }}' - nix build ".#ibis${version//./}" --fallback --keep-going --print-build-logs + nix build ".#ibisCore${version//./}" --fallback --keep-going --print-build-logs + + - name: nix build and test local + run: | + set -euo pipefail + + version='${{ matrix.python-version }}' + nix build ".#ibisLocal${version//./}" --fallback --keep-going --print-build-logs - name: nix build devShell run: | diff --git a/flake.nix b/flake.nix index d7422a91f0ab..d3b3aa843db7 100644 --- a/flake.nix +++ b/flake.nix @@ -122,9 +122,9 @@ in rec { packages = { - inherit (pkgs) ibis310 ibis311 ibis312; + inherit (pkgs) ibisCore310 ibisCore311 ibisCore312 ibisLocal310 ibisLocal311 ibisLocal312; - default = pkgs.ibis312; + default = pkgs.ibisCore312; inherit (pkgs) update-lock-files gen-examples check-release-notes-spelling; }; diff --git a/ibis/backends/bigquery/client.py b/ibis/backends/bigquery/client.py index f414a7396e0d..28e8df8b7985 100644 --- a/ibis/backends/bigquery/client.py +++ b/ibis/backends/bigquery/client.py @@ -2,10 +2,11 @@ from __future__ import annotations +import contextlib import functools +import dateutil.parser import google.cloud.bigquery as bq -import pandas as pd import ibis.common.exceptions as com import ibis.expr.datatypes as dt @@ -69,9 +70,9 @@ def bq_param_array(dtype: dt.Array, value, name): @bigquery_param.register def bq_param_timestamp(_: dt.Timestamp, value, name): - # TODO(phillipc): Not sure if this is the correct way to do this. - timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime() - return bq.ScalarQueryParameter(name, "TIMESTAMP", timestamp_value) + with contextlib.suppress(TypeError): + value = dateutil.parser.parse(value) + return bq.ScalarQueryParameter(name, "TIMESTAMP", value.isoformat()) @bigquery_param.register @@ -96,9 +97,13 @@ def bq_param_boolean(_: dt.Boolean, value, name): @bigquery_param.register def bq_param_date(_: dt.Date, value, name): - return bq.ScalarQueryParameter( - name, "DATE", pd.Timestamp(value).to_pydatetime().date() - ) + with contextlib.suppress(TypeError): + value = dateutil.parser.parse(value) + + with contextlib.suppress(AttributeError): + value = value.date() + + return bq.ScalarQueryParameter(name, "DATE", value.isoformat()) def rename_partitioned_column(table_expr, bq_table, partition_col): diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index 1d69af343588..99d4363a9859 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any import _pytest -import pandas as pd import pytest from packaging.requirements import Requirement from packaging.version import parse as vparse @@ -571,7 +570,7 @@ def geo_df(geo): @pytest.fixture -def temp_table(con) -> str: +def temp_table(con): """Return a temporary table name. Parameters @@ -590,7 +589,7 @@ def temp_table(con) -> str: @pytest.fixture -def temp_table2(con) -> str: +def temp_table2(con): name = util.gen_name("temp_table2") yield name with contextlib.suppress(NotImplementedError): @@ -606,7 +605,7 @@ def temp_table_orig(con, temp_table): @pytest.fixture -def temp_view(ddl_con) -> str: +def temp_view(ddl_con): """Return a temporary view name. Parameters @@ -625,7 +624,7 @@ def temp_view(ddl_con) -> str: @pytest.fixture -def alternate_current_database(ddl_con, ddl_backend) -> str: +def alternate_current_database(ddl_con, ddl_backend): """Create a temporary database and yield its name. Drops the created database upon completion. @@ -648,48 +647,6 @@ def alternate_current_database(ddl_con, ddl_backend) -> str: ddl_con.drop_database(name, force=True) -@pytest.fixture -def test_employee_schema() -> ibis.schema: - sch = ibis.schema( - [ - ("first_name", "string"), - ("last_name", "string"), - ("department_name", "string"), - ("salary", "float64"), - ] - ) - - return sch - - -@pytest.fixture -def test_employee_data_1(): - df = pd.DataFrame( - { - "first_name": ["A", "B", "C"], - "last_name": ["D", "E", "F"], - "department_name": ["AA", "BB", "CC"], - "salary": [100.0, 200.0, 300.0], - } - ) - - return df - - -@pytest.fixture -def test_employee_data_2(): - df2 = pd.DataFrame( - { - "first_name": ["X", "Y", "Z"], - "last_name": ["A", "B", "C"], - "department_name": ["XX", "YY", "ZZ"], - "salary": [400.0, 500.0, 600.0], - } - ) - - return df2 - - @pytest.fixture def assert_sql(con, snapshot): def checker(expr, file_name="out.sql"): diff --git a/ibis/backends/duckdb/converter.py b/ibis/backends/duckdb/converter.py index a4277ca82760..7d88f3e0cf75 100644 --- a/ibis/backends/duckdb/converter.py +++ b/ibis/backends/duckdb/converter.py @@ -1,11 +1,9 @@ from __future__ import annotations -import numpy as np - from ibis.formats.pandas import PandasData class DuckDBPandasData(PandasData): @staticmethod def convert_Array(s, dtype, pandas_type): - return s.replace(np.nan, None) + return s.replace(float("nan"), None) diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 3c937909158f..c8b4a4e91183 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -266,11 +266,13 @@ def test_connect_duckdb(url, tmp_path): @pytest.mark.parametrize( "out_method, extension", [("to_csv", "csv"), ("to_parquet", "parquet")] ) -def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path): - getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}") +def test_connect_local_file(out_method, extension, tmp_path): + df = pd.DataFrame({"a": [1, 2, 3]}) + path = tmp_path / f"out.{extension}" + getattr(df, out_method)(path) with pytest.warns(FutureWarning, match="v9.1"): # ibis.connect uses con.register - con = ibis.connect(tmp_path / f"out.{extension}") + con = ibis.connect(path) t = next(iter(con.tables.values())) assert not t.head().execute().empty diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 67dcb683a105..7b279fd1326f 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any from urllib.parse import unquote_plus -import numpy as np import pymysql import sqlglot as sg import sqlglot.expressions as sge @@ -509,7 +508,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: df = op.data.to_frame() # nan can not be used with MySQL - df = df.replace(np.nan, None) + df = df.replace(float("nan"), None) data = df.itertuples(index=False) sql = self._build_insert_template( diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index a7831e53f0bb..c4d43f280ac8 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -11,7 +11,6 @@ from typing import TYPE_CHECKING, Any from urllib.parse import unquote_plus -import numpy as np import oracledb import sqlglot as sg import sqlglot.expressions as sge @@ -534,7 +533,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: properties=sge.Properties(expressions=[sge.TemporaryProperty()]), ).sql(self.name) - data = op.data.to_frame().replace({np.nan: None}) + data = op.data.to_frame().replace(float("nan"), None) insert_stmt = self._build_insert_template( name, schema=schema, placeholder=":{i:d}" ) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 6a9fe4a3baf9..ee7ba1f03b69 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -8,8 +8,6 @@ from functools import partial, reduce, singledispatch from math import isnan -import numpy as np -import pandas as pd import polars as pl import ibis.common.exceptions as com @@ -834,7 +832,7 @@ def count_star(op, **kw): @translate.register(ops.TimestampNow) def timestamp_now(op, **_): - return pl.lit(pd.Timestamp("now", tz="UTC").tz_localize(None)) + return pl.lit(datetime.datetime.now()) @translate.register(ops.DateNow) @@ -1175,12 +1173,12 @@ def elementwise_udf(op, **kw): @translate.register(ops.E) def execute_e(op, **_): - return pl.lit(np.e) + return pl.lit(math.e) @translate.register(ops.Pi) def execute_pi(op, **_): - return pl.lit(np.pi) + return pl.lit(math.pi) @translate.register(ops.Time) diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 12ec7342d01b..616480757336 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -8,8 +8,6 @@ from typing import TYPE_CHECKING, Any from urllib.parse import unquote_plus -import numpy as np -import pandas as pd import sqlglot as sg import sqlglot.expressions as sge from pandas.api.types import is_float_dtype @@ -139,7 +137,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: convert_df = df.convert_dtypes() for col in convert_df.columns: if not is_float_dtype(convert_df[col]): - df[col] = df[col].replace(np.nan, None) + df[col] = df[col].replace(float("nan"), None) data = df.itertuples(index=False) sql = self._build_insert_template( diff --git a/ibis/backends/sqlite/converter.py b/ibis/backends/sqlite/converter.py index 555fa56b5275..63516dc591a4 100644 --- a/ibis/backends/sqlite/converter.py +++ b/ibis/backends/sqlite/converter.py @@ -1,11 +1,12 @@ from __future__ import annotations import pandas as pd +from packaging.version import parse as vparse from ibis.formats.pandas import PandasData # The "mixed" format was added in pandas 2 -_DATETIME_FORMAT = "mixed" if pd.__version__ >= "2.0.0" else None +_DATETIME_FORMAT = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None class SQLitePandasData(PandasData): diff --git a/ibis/backends/tests/base.py b/ibis/backends/tests/base.py index 5565ce71a1ca..e3a1591c3fa8 100644 --- a/ibis/backends/tests/base.py +++ b/ibis/backends/tests/base.py @@ -8,9 +8,6 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Literal -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest from filelock import FileLock @@ -22,6 +19,10 @@ PYTHON_SHORT_VERSION = f"{sys.version_info.major}{sys.version_info.minor}" +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + class BackendTest(abc.ABC): """ diff --git a/ibis/backends/tests/data.py b/ibis/backends/tests/data.py index 156550f78f93..e3a51ed83693 100644 --- a/ibis/backends/tests/data.py +++ b/ibis/backends/tests/data.py @@ -1,9 +1,9 @@ from __future__ import annotations -import numpy as np -import pandas as pd import pytest +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") pa = pytest.importorskip("pyarrow") array_types = pd.DataFrame( diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 992251c6b90f..c2caf15dfa8b 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -3,8 +3,6 @@ from datetime import date from operator import methodcaller -import numpy as np -import pandas as pd import pytest from pytest import param @@ -32,6 +30,9 @@ ) from ibis.legacy.udf.vectorized import reduction +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + with pytest.warns(FutureWarning, match="v9.0"): @reduction(input_type=[dt.double], output_type=dt.double) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 3c66946e22dd..fe38b31f6fa8 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -6,9 +6,6 @@ from datetime import datetime from functools import partial -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest import pytz import toolz @@ -35,6 +32,10 @@ ) from ibis.common.collections import frozendict +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + pytestmark = [ pytest.mark.never( ["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 6832c8ff6527..a2c063f6a6be 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -2,13 +2,14 @@ import operator -import pandas as pd -import pandas.testing as tm import pytest import ibis from ibis.backends.tests.errors import DuckDBInvalidInputException +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + @pytest.fixture(scope="module") def time_df1(): diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 8ae35e6353b7..4c875524e3d1 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -13,8 +13,6 @@ from operator import itemgetter from typing import TYPE_CHECKING -import numpy as np -import pandas as pd import pytest import rich.console import toolz @@ -46,6 +44,8 @@ from ibis.backends import BaseBackend +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") pa = pytest.importorskip("pyarrow") ds = pytest.importorskip("pyarrow.dataset") @@ -428,6 +428,18 @@ def test_create_drop_view(ddl_con, temp_view): assert set(t_expr.schema().names) == set(v_expr.schema().names) +@pytest.fixture +def test_employee_schema() -> ibis.schema: + return ibis.schema( + { + "first_name": "string", + "last_name": "string", + "department_name": "string", + "salary": "float64", + } + ) + + @pytest.fixture def employee_empty_temp_table(backend, con, test_employee_schema): temp_table_name = gen_name("temp_employee_empty_table") @@ -437,9 +449,16 @@ def employee_empty_temp_table(backend, con, test_employee_schema): @pytest.fixture -def employee_data_1_temp_table( - backend, con, test_employee_schema, test_employee_data_1 -): +def employee_data_1_temp_table(backend, con, test_employee_schema): + test_employee_data_1 = pd.DataFrame( + { + "first_name": ["A", "B", "C"], + "last_name": ["D", "E", "F"], + "department_name": ["AA", "BB", "CC"], + "salary": [100.0, 200.0, 300.0], + } + ) + temp_table_name = gen_name("temp_employee_data_1") _create_temp_table_with_schema( backend, con, temp_table_name, test_employee_schema, data=test_employee_data_1 @@ -449,6 +468,22 @@ def employee_data_1_temp_table( con.drop_table(temp_table_name, force=True) +@pytest.fixture +def test_employee_data_2(): + import pandas as pd + + df2 = pd.DataFrame( + { + "first_name": ["X", "Y", "Z"], + "last_name": ["A", "B", "C"], + "department_name": ["XX", "YY", "ZZ"], + "salary": [400.0, 500.0, 600.0], + } + ) + + return df2 + + @pytest.fixture def employee_data_2_temp_table( backend, con, test_employee_schema, test_employee_data_2 @@ -1117,6 +1152,8 @@ def test_dunder_array_column(alltypes, dtype): @pytest.mark.parametrize("interactive", [True, False]) def test_repr(alltypes, interactive, monkeypatch): + pytest.importorskip("rich") + monkeypatch.setattr(ibis.options, "interactive", interactive) expr = alltypes.select("date_string_col") @@ -1132,6 +1169,8 @@ def test_repr(alltypes, interactive, monkeypatch): @pytest.mark.parametrize("show_types", [True, False]) def test_interactive_repr_show_types(alltypes, show_types, monkeypatch): + pytest.importorskip("rich") + monkeypatch.setattr(ibis.options, "interactive", True) monkeypatch.setattr(ibis.options.repr.interactive, "show_types", show_types) @@ -1145,6 +1184,8 @@ def test_interactive_repr_show_types(alltypes, show_types, monkeypatch): @pytest.mark.parametrize("is_jupyter", [True, False]) def test_interactive_repr_max_columns(alltypes, is_jupyter, monkeypatch): + pytest.importorskip("rich") + monkeypatch.setattr(ibis.options, "interactive", True) cols = {f"c_{i}": ibis._.id + i for i in range(50)} @@ -1184,6 +1225,8 @@ def test_interactive_repr_max_columns(alltypes, is_jupyter, monkeypatch): @pytest.mark.parametrize("expr_type", ["table", "column"]) @pytest.mark.parametrize("interactive", [True, False]) def test_repr_mimebundle(alltypes, interactive, expr_type, monkeypatch): + pytest.importorskip("rich") + monkeypatch.setattr(ibis.options, "interactive", interactive) if expr_type == "column": diff --git a/ibis/backends/tests/test_conditionals.py b/ibis/backends/tests/test_conditionals.py index b9128208fe23..68d6ca7c6f00 100644 --- a/ibis/backends/tests/test_conditionals.py +++ b/ibis/backends/tests/test_conditionals.py @@ -2,8 +2,6 @@ from collections import Counter -import numpy as np -import pandas as pd import pytest import ibis @@ -33,6 +31,9 @@ def test_ifelse_select(backend, alltypes, df): def test_ifelse_column(backend, alltypes, df): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + expr = ibis.ifelse(alltypes["int_col"] == 0, 42, -1).cast("int64").name("where_col") result = expr.execute() @@ -79,7 +80,10 @@ def test_substitute(backend): ], ) def test_value_cases_scalar(con, inp, exp): + pd = pytest.importorskip("pandas") + result = con.execute(inp()) + if exp is None: assert pd.isna(result) else: @@ -92,6 +96,8 @@ def test_value_cases_scalar(con, inp, exp): raises=AssertionError, ) def test_value_cases_column(batting): + np = pytest.importorskip("numpy") + df = batting.to_pandas() expr = ( batting.RBI.case() @@ -123,6 +129,8 @@ def test_ibis_cases_scalar(): raises=TypeError, ) def test_ibis_cases_column(batting): + np = pytest.importorskip("numpy") + t = batting df = batting.to_pandas() expr = ( diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 3cae14e37017..cb6716bf85f9 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -3,8 +3,6 @@ import contextlib import getpass -import pandas as pd -import pandas.testing as tm import pytest import sqlglot as sg from pytest import param @@ -17,6 +15,9 @@ from ibis.backends.tests.base import PYTHON_SHORT_VERSION from ibis.backends.tests.errors import GoogleBadRequest, OracleDatabaseError +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + dot_sql_never = pytest.mark.never( ["dask", "pandas"], reason="dask and pandas do not accept SQL" ) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 8d067959dbd1..b59e94e5907a 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -2,7 +2,6 @@ from operator import methodcaller -import pandas as pd import pytest from packaging.version import parse as vparse from pytest import param @@ -25,6 +24,7 @@ TrinoUserError, ) +pd = pytest.importorskip("pandas") pa = pytest.importorskip("pyarrow") limit = [ diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 9be4dd25a6c0..09a8a705c60b 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -6,9 +6,6 @@ from collections import Counter from operator import invert, methodcaller, neg -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest import toolz from pytest import param @@ -38,6 +35,10 @@ ) from ibis.common.annotations import ValidationError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + NULL_BACKEND_TYPES = { "bigquery": "NULL", "clickhouse": "Nullable(Nothing)", diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 13f05bfe6186..272e47efac71 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -2,8 +2,6 @@ import sqlite3 -import numpy as np -import pandas as pd import pytest from packaging.version import parse as vparse from pytest import param @@ -13,6 +11,9 @@ import ibis.expr.schema as sch from ibis.backends.tests.errors import PyDruidProgrammingError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + sqlite_right_or_full_mark = pytest.mark.notyet( ["sqlite"], condition=vparse(sqlite3.sqlite_version) < vparse("3.39"), diff --git a/ibis/backends/tests/test_json.py b/ibis/backends/tests/test_json.py index 8d2b3e86e9af..e8ce57657cba 100644 --- a/ibis/backends/tests/test_json.py +++ b/ibis/backends/tests/test_json.py @@ -4,13 +4,14 @@ import sqlite3 -import numpy as np -import pandas as pd import pytest from packaging.version import parse as vparse import ibis.expr.types as ir +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + pytestmark = [ pytest.mark.never(["impala"], reason="doesn't support JSON and never will"), pytest.mark.notyet(["clickhouse"], reason="upstream is broken"), diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index 2f4473830d35..c212cc08c59c 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -1,8 +1,5 @@ from __future__ import annotations -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest from pytest import param @@ -11,6 +8,9 @@ import ibis.expr.datatypes as dt from ibis.backends.tests.errors import PsycoPg2InternalError, Py4JJavaError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") pa = pytest.importorskip("pyarrow") pytestmark = [ diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 12b8a8a58096..cf9e43dc75ac 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -6,8 +6,6 @@ import operator from operator import and_, lshift, or_, rshift, xor -import numpy as np -import pandas as pd import pytest from pytest import param @@ -36,6 +34,9 @@ ) from ibis.expr import datatypes as dt +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + @pytest.mark.parametrize( ("expr", "expected_types"), diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 75b1b4a6e3e8..6ec0acfb2604 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -3,8 +3,6 @@ import datetime from collections import OrderedDict -import numpy as np -import pandas as pd import pytest from pytest import param @@ -13,6 +11,9 @@ from ibis import _ from ibis.backends.tests.errors import OracleDatabaseError, PsycoPg2InternalError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + @pytest.mark.parametrize( ("column", "raw_value"), diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index adb08a93c34f..e66b6dab8a27 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -2,7 +2,6 @@ import random -import pandas as pd import pytest from pytest import param @@ -12,6 +11,8 @@ from ibis import _ from ibis.backends.tests.errors import PsycoPg2InternalError, PyDruidProgrammingError +pd = pytest.importorskip("pandas") + @pytest.fixture def union_subsets(alltypes, df): diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index e4ffabbbc58b..62db1053a2dc 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -4,8 +4,6 @@ from functools import reduce from operator import add -import numpy as np -import pandas as pd import pytest from pytest import param @@ -20,6 +18,9 @@ ) from ibis.common.annotations import ValidationError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + @pytest.mark.parametrize( ("text_value", "expected_types"), diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index e1ba89162b2a..5eb8b95d721f 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -3,9 +3,6 @@ import contextlib from collections.abc import Mapping -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest from pytest import param @@ -21,6 +18,10 @@ ) from ibis.common.exceptions import IbisError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + pytestmark = [ pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"), pytest.mark.notyet(["impala"]), diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 56c76125ea80..d1bfdd82efd0 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -6,8 +6,6 @@ import warnings from operator import methodcaller -import numpy as np -import pandas as pd import pytest import sqlglot as sg import toolz @@ -41,6 +39,9 @@ ) from ibis.common.annotations import ValidationError +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + @pytest.mark.parametrize("attr", ["year", "month", "day"]) @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index 1973b1c10439..f47d1f57ac77 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -1,7 +1,5 @@ from __future__ import annotations -import numpy as np -import pandas as pd import pytest from pytest import param @@ -11,6 +9,9 @@ from ibis.backends.conftest import is_older_than from ibis.legacy.udf.vectorized import analytic, elementwise, reduction +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + pytestmark = pytest.mark.notimpl(["druid", "oracle", "risingwave"]) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 80756473f9ba..cd67d97c2404 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -3,8 +3,6 @@ from functools import partial from operator import methodcaller -import numpy as np -import pandas as pd import pytest from pytest import param @@ -25,6 +23,9 @@ ) from ibis.legacy.udf.vectorized import analytic, reduction +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + pytestmark = [ pytest.mark.notimpl( ["druid"], raises=(com.OperationNotDefinedError, PyDruidProgrammingError) diff --git a/ibis/backends/tests/tpc/conftest.py b/ibis/backends/tests/tpc/conftest.py index b14bc973e546..979890d2c4a5 100644 --- a/ibis/backends/tests/tpc/conftest.py +++ b/ibis/backends/tests/tpc/conftest.py @@ -11,7 +11,6 @@ from dateutil.relativedelta import relativedelta import ibis -from ibis.formats.pandas import PandasData if TYPE_CHECKING: from collections.abc import Callable @@ -66,6 +65,8 @@ def inner(test: Callable[..., ir.Table]): @getattr(pytest.mark, name) @functools.wraps(test) def wrapper(*args, backend, **kwargs): + from ibis.formats.pandas import PandasData + backend_name = backend.name() if not getattr(backend, f"supports_{name}"): pytest.skip( diff --git a/ibis/common/temporal.py b/ibis/common/temporal.py index 9eaf6325cc4a..1fa3cfcad9aa 100644 --- a/ibis/common/temporal.py +++ b/ibis/common/temporal.py @@ -1,5 +1,6 @@ from __future__ import annotations +import contextlib import datetime import numbers from decimal import Decimal @@ -168,6 +169,9 @@ def normalize_timedelta( 3000000 """ + with contextlib.suppress(AttributeError): + value = value.item() + if isinstance(value, datetime.timedelta): # datetime.timedelta only stores days, seconds, and microseconds internally if value.days and not (value.seconds or value.microseconds): diff --git a/ibis/common/tests/test_temporal.py b/ibis/common/tests/test_temporal.py index bed57b260574..2def166bb8e4 100644 --- a/ibis/common/tests/test_temporal.py +++ b/ibis/common/tests/test_temporal.py @@ -4,11 +4,8 @@ from datetime import date, datetime, time, timedelta, timezone import dateutil -import pandas as pd import pytest import pytz -from packaging.version import parse as vparse -from pytest import param from ibis.common.patterns import CoercedTo from ibis.common.temporal import ( @@ -152,18 +149,6 @@ def test_normalize_timezone(value, expected): ), # date object (datetime(2017, 1, 1).date(), datetime(2017, 1, 1)), - # pandas timestamp object - (pd.Timestamp("2017-01-01"), datetime(2017, 1, 1)), - (pd.Timestamp("2017-01-01 00:00:00.000001"), datetime(2017, 1, 1, 0, 0, 0, 1)), - # pandas timestamp object with timezone - ( - pd.Timestamp("2017-01-01 00:00:00.000001+00:00"), - datetime(2017, 1, 1, 0, 0, 0, 1, tzinfo=dateutil.tz.UTC), - ), - ( - pd.Timestamp("2017-01-01 00:00:00.000001+01:00"), - datetime(2017, 1, 1, 0, 0, 0, 1, tzinfo=dateutil.tz.tzoffset(None, 3600)), - ), # datetime string ("2017-01-01", datetime(2017, 1, 1)), ("2017-01-01 00:00:00.000001", datetime(2017, 1, 1, 0, 0, 0, 1)), @@ -196,6 +181,27 @@ def test_normalize_datetime(value, expected): assert result == expected +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("2017-01-01", datetime(2017, 1, 1)), + ("2017-01-01 00:00:00.000001", datetime(2017, 1, 1, 0, 0, 0, 1)), + ( + "2017-01-01 00:00:00.000001+00:00", + datetime(2017, 1, 1, 0, 0, 0, 1, tzinfo=dateutil.tz.UTC), + ), + ( + "2017-01-01 00:00:00.000001+01:00", + datetime(2017, 1, 1, 0, 0, 0, 1, tzinfo=dateutil.tz.tzoffset(None, 3600)), + ), + ], +) +def test_normalize_datetime_pandas(value, expected): + pd = pytest.importorskip("pandas") + result = normalize_datetime(pd.Timestamp(value)) + assert result == expected + + def test_normalize_datetime_with_time(mocker): import datetime @@ -233,22 +239,31 @@ def today(cls) -> date: ), # timezone aware datetime with timezone name (datetime(2022, 1, 1, 0, 0, 0, 1, tzinfo=dateutil.tz.gettz("CET")), "CET"), - # pandas timestamp with timezone - (pd.Timestamp("2022-01-01 00:00:00.000001+00:00"), "UTC"), - param( - pd.Timestamp("2022-01-01 00:00:00.000001+01:00"), - "UTC+01:00", - marks=pytest.mark.xfail( - vparse(pd.__version__) < vparse("2.0.0") and not WINDOWS, - reason=( - "tzdata is missing in pandas < 2.0.0 due to an incorrect marker " - "in the tzdata package specification that restricts its installation " - "to windows only" - ), - ), - ), ], ) def test_normalized_datetime_tzname(value, expected): result = normalize_datetime(value) assert result.tzname() == expected + + +def test_normalized_datetime_tzname_pandas(): + pd = pytest.importorskip("pandas") + + result = normalize_datetime(pd.Timestamp("2022-01-01 00:00:00.000001+00:00")) + assert result.tzname() == "UTC" + + +def test_normalized_datetime_tzname_pandas_non_utc(): + from packaging.version import parse as vparse + + pd = pytest.importorskip("pandas") + + if vparse(pd.__version__) < vparse("2.0.0") and not WINDOWS: + pytest.xfail( + "tzdata is missing in pandas < 2.0.0 due to an incorrect marker " + "in the tzdata package specification that restricts its installation " + "to windows only" + ) + + result = normalize_datetime(pd.Timestamp("2022-01-01 00:00:00.000001+01:00")) + assert result.tzname() == "UTC+01:00" diff --git a/ibis/expr/datatypes/tests/test_pandas_numpy_value.py b/ibis/expr/datatypes/tests/test_pandas_numpy_value.py new file mode 100644 index 000000000000..ba34038ea753 --- /dev/null +++ b/ibis/expr/datatypes/tests/test_pandas_numpy_value.py @@ -0,0 +1,199 @@ +from __future__ import annotations + +from datetime import date, datetime + +import pytest +from packaging.version import parse as vparse + +import ibis.expr.datatypes as dt + +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") + + +@pytest.mark.parametrize( + ("value", "expected_dtype"), + [ + (pd.Timedelta("5 hours"), dt.Interval(unit="h")), + (pd.Timedelta("7 minutes"), dt.Interval(unit="m")), + (pd.Timedelta("11 milliseconds"), dt.Interval(unit="ms")), + (pd.Timedelta("17 nanoseconds"), dt.Interval(unit="ns")), + # numpy types + (np.int8(5), dt.int8), + (np.int16(-1), dt.int16), + (np.int32(2), dt.int32), + (np.int64(-5), dt.int64), + (np.uint8(5), dt.uint8), + (np.uint16(50), dt.uint16), + (np.uint32(500), dt.uint32), + (np.uint64(5000), dt.uint64), + (np.float32(5.5), dt.float32), + (np.float64(5.55), dt.float64), + (np.bool_(True), dt.boolean), + (np.bool_(False), dt.boolean), + # pandas types + ( + pd.Timestamp("2015-01-01 12:00:00", tz="US/Eastern"), + dt.Timestamp("US/Eastern"), + ), + ], +) +def test_infer_dtype(value, expected_dtype): + assert dt.infer(value) == expected_dtype + + +# str, pd.Timestamp, datetime, np.datetime64, numbers.Real +@pytest.mark.parametrize( + ("value", "expected"), + [ + (pd.Timestamp("2019-01-01"), datetime(2019, 1, 1)), + (pd.Timestamp("2019-01-01 00:00:00"), datetime(2019, 1, 1)), + (pd.Timestamp("2019-01-01 01:02:03.000004"), datetime(2019, 1, 1, 1, 2, 3, 4)), + (np.datetime64("2019-01-01"), datetime(2019, 1, 1)), + (np.datetime64("2019-01-01 01:02:03"), datetime(2019, 1, 1, 1, 2, 3)), + ], +) +def test_normalize_timestamp(value, expected): + normalized = dt.normalize(dt.timestamp, value) + assert normalized == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (pd.Timestamp("2019-01-01"), date(2019, 1, 1)), + (pd.Timestamp("2019-01-01 00:00:00"), date(2019, 1, 1)), + (pd.Timestamp("2019-01-01 01:02:03.000004"), date(2019, 1, 1)), + (np.datetime64("2019-01-01"), date(2019, 1, 1)), + (np.datetime64("2019-01-01 01:02:03"), date(2019, 1, 1)), + ], +) +def test_normalize_date(value, expected): + normalized = dt.normalize(dt.date, value) + assert normalized == expected + + +@pytest.mark.parametrize( + ("value", "expected_dtype"), + [ + # numpy types + (np.int8(5), dt.int8), + (np.int16(-1), dt.int16), + (np.int32(2), dt.int32), + (np.int64(-5), dt.int64), + (np.uint8(5), dt.uint8), + (np.uint16(50), dt.uint16), + (np.uint32(500), dt.uint32), + (np.uint64(5000), dt.uint64), + (np.float32(5.5), dt.float32), + (np.float64(5.55), dt.float64), + (np.bool_(True), dt.boolean), + (np.bool_(False), dt.boolean), + # pandas types + ( + pd.Timestamp("2015-01-01 12:00:00", tz="US/Eastern"), + dt.Timestamp("US/Eastern"), + ), + ], +) +def test_infer_numpy_scalar(value, expected_dtype): + assert dt.infer(value) == expected_dtype + + +@pytest.mark.parametrize( + ("numpy_dtype", "ibis_dtype"), + [ + (np.bool_, dt.boolean), + (np.int8, dt.int8), + (np.int16, dt.int16), + (np.int32, dt.int32), + (np.int64, dt.int64), + (np.uint8, dt.uint8), + (np.uint16, dt.uint16), + (np.uint32, dt.uint32), + (np.uint64, dt.uint64), + (np.float16, dt.float16), + (np.float32, dt.float32), + (np.float64, dt.float64), + (np.double, dt.double), + (np.str_, dt.string), + (np.datetime64, dt.timestamp), + ], +) +def test_from_numpy_dtype(numpy_dtype, ibis_dtype): + numpy_dtype = np.dtype(numpy_dtype) + assert dt.DataType.from_numpy(numpy_dtype) == ibis_dtype + assert dt.dtype(numpy_dtype) == ibis_dtype + + +def test_from_numpy_timedelta(): + if vparse(pytest.importorskip("pyarrow").__version__) < vparse("9"): + pytest.skip("pyarrow < 9 globally mutates the timedelta64 numpy dtype") + + numpy_dtype = np.dtype(np.timedelta64) + assert dt.DataType.from_numpy(numpy_dtype) == dt.Interval("s") + assert dt.dtype(numpy_dtype) == dt.Interval("s") + + +@pytest.mark.parametrize( + ("numpy_array", "expected_dtypes"), + [ + # Explicitly-defined dtype + (np.array([1, 2, 3], dtype="int8"), (dt.Array(dt.int8),)), + (np.array([1, 2, 3], dtype="int16"), (dt.Array(dt.int16),)), + (np.array([1, 2, 3], dtype="int32"), (dt.Array(dt.int32),)), + (np.array([1, 2, 3], dtype="int64"), (dt.Array(dt.int64),)), + (np.array([1, 2, 3], dtype="uint8"), (dt.Array(dt.uint8),)), + (np.array([1, 2, 3], dtype="uint16"), (dt.Array(dt.uint16),)), + (np.array([1, 2, 3], dtype="uint32"), (dt.Array(dt.uint32),)), + (np.array([1, 2, 3], dtype="uint64"), (dt.Array(dt.uint64),)), + (np.array([1.0, 2.0, 3.0], dtype="float32"), (dt.Array(dt.float32),)), + (np.array([1.0, 2.0, 3.0], dtype="float64"), (dt.Array(dt.float64),)), + (np.array([True, False, True], dtype="bool"), (dt.Array(dt.boolean),)), + # Implicit dtype + # Integer array could be inferred to int64 or int32 depending on system + (np.array([1, 2, 3]), (dt.Array(dt.int64), dt.Array(dt.int32))), + (np.array([1.0, 2.0, 3.0]), (dt.Array(dt.float64),)), + (np.array([np.nan, np.nan, np.nan]), (dt.Array(dt.float64),)), + (np.array([True, False, True]), (dt.Array(dt.boolean),)), + (np.array(["1", "2", "3"]), (dt.Array(dt.string),)), + ( + np.array( + [ + pd.Timestamp("2015-01-01 12:00:00"), + pd.Timestamp("2015-01-02 12:00:00"), + pd.Timestamp("2015-01-03 12:00:00"), + ] + ), + (dt.Array(dt.Timestamp()), dt.Array(dt.Timestamp(scale=6))), + ), + # Implied from object dtype + (np.array([1, 2, 3], dtype=object), (dt.Array(dt.int64),)), + (np.array([1.0, 2.0, 3.0], dtype=object), (dt.Array(dt.float64),)), + (np.array([True, False, True], dtype=object), (dt.Array(dt.boolean),)), + (np.array(["1", "2", "3"], dtype=object), (dt.Array(dt.string),)), + ( + np.array( + [ + pd.Timestamp("2015-01-01 12:00:00"), + pd.Timestamp("2015-01-02 12:00:00"), + pd.Timestamp("2015-01-03 12:00:00"), + ], + dtype=object, + ), + (dt.Array(dt.Timestamp()), dt.Array(dt.Timestamp(scale=6))), + ), + ], +) +def test_infer_numpy_array(numpy_array, expected_dtypes): + pytest.importorskip("pyarrow") + pandas_series = pd.Series(numpy_array) + assert dt.infer(numpy_array) in expected_dtypes + assert dt.infer(pandas_series) in expected_dtypes + + +def test_normalize_non_convertible_boolean(): + typ = dt.boolean + value = np.array([1, 2, 3]) + with pytest.raises(TypeError, match="Unable to normalize .+ to Boolean"): + dt.normalize(typ, value) diff --git a/ibis/expr/datatypes/tests/test_value.py b/ibis/expr/datatypes/tests/test_value.py index bdbbcf1c9782..72d857bff7c2 100644 --- a/ibis/expr/datatypes/tests/test_value.py +++ b/ibis/expr/datatypes/tests/test_value.py @@ -6,11 +6,8 @@ from collections import OrderedDict from datetime import date, datetime, timedelta -import numpy as np -import pandas as pd import pytest import pytz -from packaging.version import parse as vparse import ibis.expr.datatypes as dt @@ -31,12 +28,8 @@ class Foo(enum.Enum): (date.today(), dt.date), (datetime.now(), dt.timestamp), (timedelta(days=3), dt.Interval(unit="D")), - (pd.Timedelta("5 hours"), dt.Interval(unit="h")), - (pd.Timedelta("7 minutes"), dt.Interval(unit="m")), (timedelta(seconds=9), dt.Interval(unit="s")), - (pd.Timedelta("11 milliseconds"), dt.Interval(unit="ms")), (timedelta(microseconds=15), dt.Interval(unit="us")), - (pd.Timedelta("17 nanoseconds"), dt.Interval(unit="ns")), # numeric types (5, dt.int8), (5, dt.int8), @@ -88,24 +81,6 @@ class Foo(enum.Enum): ), ), (Foo.a, dt.Enum()), - # numpy types - (np.int8(5), dt.int8), - (np.int16(-1), dt.int16), - (np.int32(2), dt.int32), - (np.int64(-5), dt.int64), - (np.uint8(5), dt.uint8), - (np.uint16(50), dt.uint16), - (np.uint32(500), dt.uint32), - (np.uint64(5000), dt.uint64), - (np.float32(5.5), dt.float32), - (np.float64(5.55), dt.float64), - (np.bool_(True), dt.boolean), - (np.bool_(False), dt.boolean), - # pandas types - ( - pd.Timestamp("2015-01-01 12:00:00", tz="US/Eastern"), - dt.Timestamp("US/Eastern"), - ), ], ) def test_infer_dtype(value, expected_dtype): @@ -168,11 +143,6 @@ def test_infer_timedelta(): ), (datetime(2019, 1, 1), datetime(2019, 1, 1)), (datetime(2019, 1, 1, 1, 2, 3, 4), datetime(2019, 1, 1, 1, 2, 3, 4)), - (pd.Timestamp("2019-01-01"), datetime(2019, 1, 1)), - (pd.Timestamp("2019-01-01 00:00:00"), datetime(2019, 1, 1)), - (pd.Timestamp("2019-01-01 01:02:03.000004"), datetime(2019, 1, 1, 1, 2, 3, 4)), - (np.datetime64("2019-01-01"), datetime(2019, 1, 1)), - (np.datetime64("2019-01-01 01:02:03"), datetime(2019, 1, 1, 1, 2, 3)), ], ) def test_normalize_timestamp(value, expected): @@ -188,11 +158,6 @@ def test_normalize_timestamp(value, expected): ("2019-01-01 01:02:03.000004", date(2019, 1, 1)), (datetime(2019, 1, 1), date(2019, 1, 1)), (datetime(2019, 1, 1, 1, 2, 3, 4), date(2019, 1, 1)), - (pd.Timestamp("2019-01-01"), date(2019, 1, 1)), - (pd.Timestamp("2019-01-01 00:00:00"), date(2019, 1, 1)), - (pd.Timestamp("2019-01-01 01:02:03.000004"), date(2019, 1, 1)), - (np.datetime64("2019-01-01"), date(2019, 1, 1)), - (np.datetime64("2019-01-01 01:02:03"), date(2019, 1, 1)), ], ) def test_normalize_date(value, expected): @@ -214,125 +179,6 @@ def test_normalize_interval(dtype, value, expected): assert normalized == expected -@pytest.mark.parametrize( - ("value", "expected_dtype"), - [ - # numpy types - (np.int8(5), dt.int8), - (np.int16(-1), dt.int16), - (np.int32(2), dt.int32), - (np.int64(-5), dt.int64), - (np.uint8(5), dt.uint8), - (np.uint16(50), dt.uint16), - (np.uint32(500), dt.uint32), - (np.uint64(5000), dt.uint64), - (np.float32(5.5), dt.float32), - (np.float64(5.55), dt.float64), - (np.bool_(True), dt.boolean), - (np.bool_(False), dt.boolean), - # pandas types - ( - pd.Timestamp("2015-01-01 12:00:00", tz="US/Eastern"), - dt.Timestamp("US/Eastern"), - ), - ], -) -def test_infer_numpy_scalar(value, expected_dtype): - assert dt.infer(value) == expected_dtype - - -@pytest.mark.parametrize( - ("numpy_dtype", "ibis_dtype"), - [ - (np.bool_, dt.boolean), - (np.int8, dt.int8), - (np.int16, dt.int16), - (np.int32, dt.int32), - (np.int64, dt.int64), - (np.uint8, dt.uint8), - (np.uint16, dt.uint16), - (np.uint32, dt.uint32), - (np.uint64, dt.uint64), - (np.float16, dt.float16), - (np.float32, dt.float32), - (np.float64, dt.float64), - (np.double, dt.double), - (np.str_, dt.string), - (np.datetime64, dt.timestamp), - ], -) -def test_from_numpy_dtype(numpy_dtype, ibis_dtype): - numpy_dtype = np.dtype(numpy_dtype) - assert dt.DataType.from_numpy(numpy_dtype) == ibis_dtype - assert dt.dtype(numpy_dtype) == ibis_dtype - - -def test_from_numpy_timedelta(): - if vparse(pytest.importorskip("pyarrow").__version__) < vparse("9"): - pytest.skip("pyarrow < 9 globally mutates the timedelta64 numpy dtype") - - numpy_dtype = np.dtype(np.timedelta64) - assert dt.DataType.from_numpy(numpy_dtype) == dt.Interval("s") - assert dt.dtype(numpy_dtype) == dt.Interval("s") - - -@pytest.mark.parametrize( - ("numpy_array", "expected_dtypes"), - [ - # Explicitly-defined dtype - (np.array([1, 2, 3], dtype="int8"), (dt.Array(dt.int8),)), - (np.array([1, 2, 3], dtype="int16"), (dt.Array(dt.int16),)), - (np.array([1, 2, 3], dtype="int32"), (dt.Array(dt.int32),)), - (np.array([1, 2, 3], dtype="int64"), (dt.Array(dt.int64),)), - (np.array([1, 2, 3], dtype="uint8"), (dt.Array(dt.uint8),)), - (np.array([1, 2, 3], dtype="uint16"), (dt.Array(dt.uint16),)), - (np.array([1, 2, 3], dtype="uint32"), (dt.Array(dt.uint32),)), - (np.array([1, 2, 3], dtype="uint64"), (dt.Array(dt.uint64),)), - (np.array([1.0, 2.0, 3.0], dtype="float32"), (dt.Array(dt.float32),)), - (np.array([1.0, 2.0, 3.0], dtype="float64"), (dt.Array(dt.float64),)), - (np.array([True, False, True], dtype="bool"), (dt.Array(dt.boolean),)), - # Implicit dtype - # Integer array could be inferred to int64 or int32 depending on system - (np.array([1, 2, 3]), (dt.Array(dt.int64), dt.Array(dt.int32))), - (np.array([1.0, 2.0, 3.0]), (dt.Array(dt.float64),)), - (np.array([np.nan, np.nan, np.nan]), (dt.Array(dt.float64),)), - (np.array([True, False, True]), (dt.Array(dt.boolean),)), - (np.array(["1", "2", "3"]), (dt.Array(dt.string),)), - ( - np.array( - [ - pd.Timestamp("2015-01-01 12:00:00"), - pd.Timestamp("2015-01-02 12:00:00"), - pd.Timestamp("2015-01-03 12:00:00"), - ] - ), - (dt.Array(dt.Timestamp()), dt.Array(dt.Timestamp(scale=6))), - ), - # Implied from object dtype - (np.array([1, 2, 3], dtype=object), (dt.Array(dt.int64),)), - (np.array([1.0, 2.0, 3.0], dtype=object), (dt.Array(dt.float64),)), - (np.array([True, False, True], dtype=object), (dt.Array(dt.boolean),)), - (np.array(["1", "2", "3"], dtype=object), (dt.Array(dt.string),)), - ( - np.array( - [ - pd.Timestamp("2015-01-01 12:00:00"), - pd.Timestamp("2015-01-02 12:00:00"), - pd.Timestamp("2015-01-03 12:00:00"), - ], - dtype=object, - ), - (dt.Array(dt.Timestamp()), dt.Array(dt.Timestamp(scale=6))), - ), - ], -) -def test_infer_numpy_array(numpy_array, expected_dtypes): - pytest.importorskip("pyarrow") - pandas_series = pd.Series(numpy_array) - assert dt.infer(numpy_array) in expected_dtypes - assert dt.infer(pandas_series) in expected_dtypes - - def test_normalize_json(): obj = ["foo", {"bar": ("baz", None, 1.0, 2)}] expected = json.dumps(obj) @@ -350,13 +196,6 @@ def test_normalize_none_with_non_nullable_type(): dt.normalize(typ, None) -def test_normalize_non_convertible_boolean(): - typ = dt.boolean - value = np.array([1, 2, 3]) - with pytest.raises(TypeError, match="Unable to normalize .+ to Boolean"): - dt.normalize(typ, value) - - @pytest.mark.parametrize("bits", [8, 16, 32, 64]) @pytest.mark.parametrize("kind", ["uint", "int"]) def test_normalize_non_convertible_int(kind, bits): diff --git a/ibis/expr/tests/test_api.py b/ibis/expr/tests/test_api.py index 975e65121312..091ab3a259d6 100644 --- a/ibis/expr/tests/test_api.py +++ b/ibis/expr/tests/test_api.py @@ -3,7 +3,6 @@ import operator from datetime import datetime -import pandas as pd import pytest from dateutil.tz import tzoffset, tzutc from pytest import param @@ -78,28 +77,42 @@ def test_schema_from_names_and_typesield_names(): "UTC+01:00", id="from_string_millis_utc_+1_offset", ), + ], +) +def test_timestamp(string, expected_value, expected_timezone): + expr = ibis.timestamp(string) + op = expr.op() + assert isinstance(expr, ibis.expr.types.TimestampScalar) + assert op.value == expected_value + assert op.dtype == dt.Timestamp(timezone=expected_timezone) + + +@pytest.mark.parametrize( + ("string", "expected_value", "expected_timezone"), + [ param( - pd.Timestamp("2015-01-01 12:34:56.789"), + "2015-01-01 12:34:56.789", datetime(2015, 1, 1, 12, 34, 56, 789000), None, id="from_pandas_millis", ), param( - pd.Timestamp("2015-01-01 12:34:56.789", tz="UTC"), + "2015-01-01 12:34:56.789+00:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzutc()), "UTC", id="from_pandas_millis_utc", ), param( - pd.Timestamp("2015-01-01 12:34:56.789+03:00"), + "2015-01-01 12:34:56.789+03:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzoffset(None, 10800)), "UTC+03:00", id="from_pandas_millis_+3_offset", ), ], ) -def test_timestamp(string, expected_value, expected_timezone): - expr = ibis.timestamp(string) +def test_timestamp_pandas(string, expected_value, expected_timezone): + pd = pytest.importorskip("pandas") + expr = ibis.timestamp(pd.Timestamp(string)) op = expr.op() assert isinstance(expr, ibis.expr.types.TimestampScalar) assert op.value == expected_value @@ -122,6 +135,8 @@ def test_repr_deferred_with_exprs(f, sol): def test_duplicate_columns_in_memtable_not_allowed(): + pd = pytest.importorskip("pandas") + df = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "a"]) with pytest.raises(IbisInputError, match="Duplicate column names"): diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index ee8fed0a6f69..806eed536931 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -349,8 +349,6 @@ def multi_output_udf(v): expr = table.aggregate(multi_output_udf(table["col"]).destructure()) result = repr(expr) - assert "sum: StructField(ReductionVectorizedUDF" in result - assert "mean: StructField(ReductionVectorizedUDF" in result snapshot.assert_match(result, "repr.txt") @@ -371,6 +369,8 @@ def test_format_dummy_table(snapshot): def test_format_in_memory_table(snapshot): + pytest.importorskip("pandas") + t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) expr = t.x.sum() + t.y.sum() diff --git a/ibis/expr/tests/test_schema.py b/ibis/expr/tests/test_schema.py index 3bd6f059e83b..ffb1efe1e39a 100644 --- a/ibis/expr/tests/test_schema.py +++ b/ibis/expr/tests/test_schema.py @@ -1,10 +1,8 @@ from __future__ import annotations -import contextlib from dataclasses import dataclass from typing import NamedTuple -import numpy as np import pytest import ibis.expr.datatypes as dt @@ -13,12 +11,6 @@ from ibis.common.grounds import Annotable from ibis.common.patterns import CoercedTo -has_pandas = False -with contextlib.suppress(ImportError): - import pandas as pd - - has_pandas = True - def test_whole_schema(): schema = { @@ -169,6 +161,7 @@ def test_nullable_output(): @pytest.fixture def df(): + pd = pytest.importorskip("pandas") return pd.DataFrame({"A": pd.Series([1], dtype="int8"), "b": ["x"]}) @@ -435,6 +428,7 @@ def test_schema_from_to_polars_schema(): def test_schema_from_to_numpy_dtypes(): + np = pytest.importorskip("np") numpy_dtypes = [ ("a", np.dtype("int64")), ("b", np.dtype("str")), @@ -452,17 +446,10 @@ def test_schema_from_to_numpy_dtypes(): assert restored_dtypes == expected_dtypes -@pytest.mark.parametrize( - ("from_method", "to_method"), - [ - pytest.param( - "from_pandas", - "to_pandas", - marks=pytest.mark.skipif(not has_pandas, reason="pandas not installed"), - ), - ], -) -def test_schema_from_to_pandas_dask_dtypes(from_method, to_method): +def test_schema_from_to_pandas_dask_dtypes(): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + pandas_schema = pd.Series( [ ("a", np.dtype("int64")), @@ -471,7 +458,7 @@ def test_schema_from_to_pandas_dask_dtypes(from_method, to_method): ("d", pd.DatetimeTZDtype(tz="US/Eastern", unit="ns")), ] ) - ibis_schema = getattr(sch.Schema, from_method)(pandas_schema) + ibis_schema = sch.Schema.from_pandas(pandas_schema) assert ibis_schema == sch.schema(pandas_schema) expected = sch.Schema( @@ -484,7 +471,7 @@ def test_schema_from_to_pandas_dask_dtypes(from_method, to_method): ) assert ibis_schema == expected - restored_dtypes = getattr(ibis_schema, to_method)() + restored_dtypes = ibis_schema.to_pandas() expected_dtypes = [ ("a", np.dtype("int64")), ("b", np.dtype("object")), diff --git a/ibis/expr/tests/test_sql.py b/ibis/expr/tests/test_sql.py index 5ed847000304..b67bb1d4687f 100644 --- a/ibis/expr/tests/test_sql.py +++ b/ibis/expr/tests/test_sql.py @@ -4,6 +4,8 @@ import ibis +pytest.importorskip("black") + catalog = { "employee": {"first_name": "string", "last_name": "string", "id": "int64"}, "call": { diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index e948db76c5a8..fe2dea4e4d0a 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -6,9 +6,6 @@ from typing import TYPE_CHECKING, Any, NoReturn from public import public -from rich.console import Console -from rich.jupyter import JupyterMixin -from rich.text import Text import ibis import ibis.expr.operations as ops @@ -20,7 +17,6 @@ from ibis.config import _default_backend from ibis.config import options as opts from ibis.expr.format import pretty -from ibis.expr.types.pretty import to_rich from ibis.util import experimental if TYPE_CHECKING: @@ -31,19 +27,28 @@ import polars as pl import pyarrow as pa import torch + from rich.console import Console import ibis.expr.types as ir from ibis.backends import BaseBackend from ibis.expr.visualize import EdgeAttributeGetter, NodeAttributeGetter -class _FixedTextJupyterMixin(JupyterMixin): - """JupyterMixin adds a spurious newline to text, this fixes the issue.""" +try: + from rich.jupyter import JupyterMixin +except ImportError: - def _repr_mimebundle_(self, *args, **kwargs): - bundle = super()._repr_mimebundle_(*args, **kwargs) - bundle["text/plain"] = bundle["text/plain"].rstrip() - return bundle + class _FixedTextJupyterMixin: + """No-op when rich is not installed.""" +else: + + class _FixedTextJupyterMixin(JupyterMixin): + """JupyterMixin adds a spurious newline to text, this fixes the issue.""" + + def _repr_mimebundle_(self, *args, **kwargs): + bundle = super()._repr_mimebundle_(*args, **kwargs) + bundle["text/plain"] = bundle["text/plain"].rstrip() + return bundle @public @@ -61,6 +66,8 @@ def _noninteractive_repr(self) -> str: return pretty(self.op(), scope=scope) def _interactive_repr(self) -> str: + from rich.console import Console + console = Console(force_terminal=False) with console.capture() as capture: try: @@ -96,8 +103,12 @@ def __rich_console__(self, console: Console, options): try: if opts.interactive: + from ibis.expr.types.pretty import to_rich + rich_object = to_rich(self, console_width=console_width) else: + from rich.text import Text + rich_object = Text(self._noninteractive_repr()) except Exception as e: # In IPython exceptions inside of _repr_mimebundle_ are swallowed to diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 473b74fc5d4c..57d4c0cf8c48 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -14,7 +14,6 @@ from ibis.common.grounds import Singleton from ibis.expr.rewrites import rewrite_window_input from ibis.expr.types.core import Expr, _binop, _FixedTextJupyterMixin, _is_null_literal -from ibis.expr.types.pretty import to_rich from ibis.util import deprecated, promote_list, warn_deprecated if TYPE_CHECKING: @@ -1419,6 +1418,8 @@ def preview( │ … │ └────────┘ """ + from ibis.expr.types.pretty import to_rich + return to_rich( self, max_rows=max_rows, diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 40164077b7d5..df26d1fe634a 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -22,7 +22,6 @@ from ibis.expr.rewrites import DerefMap from ibis.expr.types.core import Expr, _FixedTextJupyterMixin from ibis.expr.types.generic import Value, literal -from ibis.expr.types.pretty import to_rich from ibis.expr.types.temporal import TimestampColumn from ibis.util import deprecated @@ -529,6 +528,8 @@ def preview( │ … │ … │ … │ └─────────┴──────────┴───┘ """ + from ibis.expr.types.pretty import to_rich + return to_rich( self, max_columns=max_columns, @@ -4343,8 +4344,6 @@ def pivot_wider( │ … │ … │ … │ … │ └───────┴──────────┴──────────┴──────────┘ """ - import pandas as pd - import ibis.selectors as s from ibis.expr.rewrites import _, p, x @@ -4366,23 +4365,25 @@ def pivot_wider( if names is None: # no names provided, compute them from the data names = self.select(names_from).distinct().execute() + columns = names.columns.tolist() + names = list(names.itertuples(index=False)) else: if not (columns := [col.get_name() for col in names_from.expand(self)]): raise com.IbisInputError( f"No matching names columns in `names_from`: {orig_names_from}" ) - names = pd.DataFrame(list(map(util.promote_list, names)), columns=columns) + names = list(map(tuple, map(util.promote_list, names))) if names_sort: - names = names.sort_values(by=names.columns.tolist()) + names.sort() values_cols = values_from.expand(self) more_than_one_value = len(values_cols) > 1 aggs = {} - names_cols_exprs = [self[col] for col in names.columns] + names_cols_exprs = [self[col] for col in columns] - for keys in names.itertuples(index=False): + for keys in names: where = ibis.and_(*map(operator.eq, names_cols_exprs, keys)) for values_col in values_cols: diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 37d6a9fc62e8..e536e0fe813d 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -2,7 +2,6 @@ import contextlib import datetime -import warnings from functools import partial from importlib.util import find_spec as _find_spec from typing import TYPE_CHECKING @@ -24,14 +23,6 @@ import polars as pl import pyarrow as pa -_has_arrow_dtype = hasattr(pd, "ArrowDtype") - -if not _has_arrow_dtype: - warnings.warn( - f"The `ArrowDtype` class is not available in pandas {pd.__version__}. " - "Install pandas >= 1.5.0 for interop with pandas and arrow dtype support" - ) - geospatial_supported = _find_spec("geopandas") is not None @@ -47,7 +38,7 @@ def to_ibis(cls, typ, nullable=True): return dt.String(nullable=nullable) return cls.to_ibis(typ.categories.dtype, nullable=nullable) elif pdt.is_extension_array_dtype(typ): - if _has_arrow_dtype and isinstance(typ, pd.ArrowDtype): + if isinstance(typ, pd.ArrowDtype): return PyArrowType.to_ibis(typ.pyarrow_dtype, nullable=nullable) else: name = typ.__class__.__name__.replace("Dtype", "") diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 7decb43a34b7..27333b17af07 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -1,8 +1,10 @@ from __future__ import annotations -import functools from typing import TYPE_CHECKING, Any +import pyarrow as pa +import pyarrow_hotfix # noqa: F401 + import ibis.expr.datatypes as dt from ibis.expr.schema import Schema from ibis.formats import DataMapper, SchemaMapper, TableProxy, TypeMapper @@ -11,77 +13,63 @@ from collections.abc import Sequence import polars as pl - import pyarrow as pa - - -@functools.cache -def _from_pyarrow_types(): - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - - return { - pa.int8(): dt.Int8, - pa.int16(): dt.Int16, - pa.int32(): dt.Int32, - pa.int64(): dt.Int64, - pa.uint8(): dt.UInt8, - pa.uint16(): dt.UInt16, - pa.uint32(): dt.UInt32, - pa.uint64(): dt.UInt64, - pa.float16(): dt.Float16, - pa.float32(): dt.Float32, - pa.float64(): dt.Float64, - pa.string(): dt.String, - pa.binary(): dt.Binary, - pa.bool_(): dt.Boolean, - pa.date32(): dt.Date, - pa.date64(): dt.Date, - pa.null(): dt.Null, - pa.string(): dt.String, - pa.large_binary(): dt.Binary, - pa.large_string(): dt.String, - pa.binary(): dt.Binary, - } - - -@functools.cache -def _to_pyarrow_types(): - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - - return { - dt.Null: pa.null(), - dt.Boolean: pa.bool_(), - dt.Binary: pa.binary(), - dt.Int8: pa.int8(), - dt.Int16: pa.int16(), - dt.Int32: pa.int32(), - dt.Int64: pa.int64(), - dt.UInt8: pa.uint8(), - dt.UInt16: pa.uint16(), - dt.UInt32: pa.uint32(), - dt.UInt64: pa.uint64(), - dt.Float16: pa.float16(), - dt.Float32: pa.float32(), - dt.Float64: pa.float64(), - dt.String: pa.string(), - dt.Binary: pa.binary(), - # assume unknown types can be converted into strings - dt.Unknown: pa.string(), - dt.MACADDR: pa.string(), - dt.INET: pa.string(), - dt.UUID: pa.string(), - dt.JSON: pa.string(), - } + + +_from_pyarrow_types = { + pa.int8(): dt.Int8, + pa.int16(): dt.Int16, + pa.int32(): dt.Int32, + pa.int64(): dt.Int64, + pa.uint8(): dt.UInt8, + pa.uint16(): dt.UInt16, + pa.uint32(): dt.UInt32, + pa.uint64(): dt.UInt64, + pa.float16(): dt.Float16, + pa.float32(): dt.Float32, + pa.float64(): dt.Float64, + pa.string(): dt.String, + pa.binary(): dt.Binary, + pa.bool_(): dt.Boolean, + pa.date32(): dt.Date, + pa.date64(): dt.Date, + pa.null(): dt.Null, + pa.string(): dt.String, + pa.large_binary(): dt.Binary, + pa.large_string(): dt.String, + pa.binary(): dt.Binary, +} + + +_to_pyarrow_types = { + dt.Null: pa.null(), + dt.Boolean: pa.bool_(), + dt.Binary: pa.binary(), + dt.Int8: pa.int8(), + dt.Int16: pa.int16(), + dt.Int32: pa.int32(), + dt.Int64: pa.int64(), + dt.UInt8: pa.uint8(), + dt.UInt16: pa.uint16(), + dt.UInt32: pa.uint32(), + dt.UInt64: pa.uint64(), + dt.Float16: pa.float16(), + dt.Float32: pa.float32(), + dt.Float64: pa.float64(), + dt.String: pa.string(), + dt.Binary: pa.binary(), + # assume unknown types can be converted into strings + dt.Unknown: pa.string(), + dt.MACADDR: pa.string(), + dt.INET: pa.string(), + dt.UUID: pa.string(), + dt.JSON: pa.string(), +} class PyArrowType(TypeMapper): @classmethod def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: """Convert a pyarrow type to an ibis type.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - if pa.types.is_null(typ): return dt.null elif pa.types.is_decimal(typ): @@ -162,14 +150,11 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: return dt.GeoSpatial(geotype, srid, nullable) else: - return _from_pyarrow_types()[typ](nullable=nullable) + return _from_pyarrow_types[typ](nullable=nullable) @classmethod def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: """Convert an ibis type to a pyarrow type.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - if dtype.is_decimal(): # set default precision and scale to something; unclear how to choose this precision = 38 if dtype.precision is None else dtype.precision @@ -247,7 +232,7 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: return gat.wkb(crs=crs, edge_type=edge_type).to_pyarrow() else: try: - return _to_pyarrow_types()[type(dtype)] + return _to_pyarrow_types[type(dtype)] except KeyError: raise NotImplementedError( f"Converting {dtype} to pyarrow is not supported yet" @@ -258,9 +243,6 @@ class PyArrowSchema(SchemaMapper): @classmethod def from_ibis(cls, schema: Schema) -> pa.Schema: """Convert a schema to a pyarrow schema.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - fields = [ pa.field(name, PyArrowType.from_ibis(dtype), nullable=dtype.nullable) for name, dtype in schema.items() @@ -278,17 +260,11 @@ class PyArrowData(DataMapper): @classmethod def infer_scalar(cls, scalar: Any) -> dt.DataType: """Infer the ibis type of a scalar.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - return PyArrowType.to_ibis(pa.scalar(scalar).type) @classmethod def infer_column(cls, column: Sequence) -> dt.DataType: """Infer the ibis type of a sequence.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - if isinstance(column, pa.Array): return PyArrowType.to_ibis(column.type) @@ -313,9 +289,6 @@ def infer_column(cls, column: Sequence) -> dt.DataType: @classmethod def infer_table(cls, table) -> Schema: """Infer the schema of a table.""" - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - if not isinstance(table, pa.Table): table = pa.table(table) @@ -323,9 +296,6 @@ def infer_table(cls, table) -> Schema: @classmethod def convert_scalar(cls, scalar: pa.Scalar, dtype: dt.DataType) -> pa.Scalar: - import pyarrow as pa - import pyarrow_hotfix # noqa: F401 - desired_type = PyArrowType.from_ibis(dtype) scalar_type = scalar.type if scalar_type != desired_type: diff --git a/ibis/formats/tests/test_numpy.py b/ibis/formats/tests/test_numpy.py index be36cb22cb78..1d29c80dbe3b 100644 --- a/ibis/formats/tests/test_numpy.py +++ b/ibis/formats/tests/test_numpy.py @@ -1,15 +1,17 @@ from __future__ import annotations import hypothesis as h -import hypothesis.extra.numpy as npst import hypothesis.strategies as st -import numpy as np import pytest from packaging.version import parse as vparse import ibis.expr.datatypes as dt import ibis.tests.strategies as ibst -from ibis.formats.numpy import NumpySchema, NumpyType + +np = pytest.importorskip("numpy") +npst = pytest.importorskip("hypothesis.extra.numpy") + +from ibis.formats.numpy import NumpySchema, NumpyType # noqa: E402 roundtripable_types = st.deferred( lambda: ( diff --git a/ibis/formats/tests/test_pandas.py b/ibis/formats/tests/test_pandas.py index a73a705c45d2..671d3be1b99c 100644 --- a/ibis/formats/tests/test_pandas.py +++ b/ibis/formats/tests/test_pandas.py @@ -3,18 +3,19 @@ from datetime import time from decimal import Decimal -import numpy as np -import pandas as pd -import pandas.testing as tm import pytest from pytest import param import ibis import ibis.expr.datatypes as dt import ibis.expr.schema as sch -from ibis.formats.pandas import PandasData, PandasSchema, PandasType pa = pytest.importorskip("pyarrow") +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +tm = pytest.importorskip("pandas.testing") + +from ibis.formats.pandas import PandasData, PandasSchema, PandasType # noqa: E402 @pytest.mark.parametrize( diff --git a/ibis/legacy/udf/vectorized.py b/ibis/legacy/udf/vectorized.py index dec73adee4d9..4b662a19a61a 100644 --- a/ibis/legacy/udf/vectorized.py +++ b/ibis/legacy/udf/vectorized.py @@ -10,8 +10,6 @@ import functools from typing import TYPE_CHECKING, Any -import numpy as np - import ibis.expr.datatypes as dt import ibis.legacy.udf.validate as v from ibis.expr.operations import ( @@ -22,6 +20,7 @@ from ibis.util import deprecated if TYPE_CHECKING: + import numpy as np import pandas as pd @@ -50,6 +49,8 @@ def _coerce_to_np_array( - `np.ndarray` - `pd.Series` """ + import numpy as np + return np.array(data) @@ -84,6 +85,7 @@ def _coerce_to_series( Output Series """ + import numpy as np import pandas as pd if isinstance(data, (list, np.ndarray)): @@ -160,6 +162,7 @@ def _coerce_to_dataframe( 0 1 2 3 """ + import numpy as np import pandas as pd if isinstance(data, pd.DataFrame): diff --git a/ibis/tests/expr/test_pretty_repr.py b/ibis/tests/expr/test_pretty_repr.py index 547c0d537fa4..c39ccc6ca184 100644 --- a/ibis/tests/expr/test_pretty_repr.py +++ b/ibis/tests/expr/test_pretty_repr.py @@ -3,13 +3,16 @@ import datetime import decimal -import pandas as pd import pytest -from rich.console import Console import ibis import ibis.expr.datatypes as dt -from ibis.expr.types.pretty import format_column, format_values + +pytest.importorskip("rich") + +from ibis.expr.types.pretty import format_column, format_values # noqa: E402 + +pd = pytest.importorskip("pandas") null = "NULL" @@ -179,6 +182,8 @@ def test_all_empty_groups_repr(): def test_non_interactive_column_repr(): + from rich.console import Console + t = ibis.table(dict(names="string", values="int")) expr = t.names console = Console() diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index cd2718cd7678..db5a3bafda99 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -1,11 +1,8 @@ from __future__ import annotations -import datetime import pickle import re -import numpy as np -import pandas as pd import pytest from pytest import param @@ -1028,34 +1025,30 @@ def test_asof_join_with_by(): @pytest.mark.parametrize( ("ibis_interval", "timedelta_interval"), [ - [ibis.interval(days=2), pd.Timedelta("2 days")], - [ibis.interval(days=2), datetime.timedelta(days=2)], - [ibis.interval(hours=5), pd.Timedelta("5 hours")], - [ibis.interval(hours=5), datetime.timedelta(hours=5)], - [ibis.interval(minutes=7), pd.Timedelta("7 minutes")], - [ibis.interval(minutes=7), datetime.timedelta(minutes=7)], - [ibis.interval(seconds=9), pd.Timedelta("9 seconds")], - [ibis.interval(seconds=9), datetime.timedelta(seconds=9)], - [ibis.interval(milliseconds=11), pd.Timedelta("11 milliseconds")], - [ibis.interval(milliseconds=11), datetime.timedelta(milliseconds=11)], - [ibis.interval(microseconds=15), pd.Timedelta("15 microseconds")], - [ibis.interval(microseconds=15), datetime.timedelta(microseconds=15)], - [ibis.interval(nanoseconds=17), pd.Timedelta("17 nanoseconds")], + (ibis.interval(days=2), "2 days"), + (ibis.interval(hours=5), "5 hours"), + (ibis.interval(minutes=7), "7 minutes"), + (ibis.interval(seconds=9), "9 seconds"), + (ibis.interval(milliseconds=11), "11 milliseconds"), + (ibis.interval(microseconds=15), "15 microseconds"), + (ibis.interval(nanoseconds=17), "17 nanoseconds"), ], ) def test_asof_join_with_tolerance(ibis_interval, timedelta_interval): + pd = pytest.importorskip("pandas") + left = ibis.table([("time", "timestamp"), ("key", "int32"), ("value", "double")]) right = ibis.table([("time", "timestamp"), ("key", "int32"), ("value2", "double")]) - for interval in [ibis_interval, timedelta_interval]: + for interval in [ibis_interval, pd.Timedelta(timedelta_interval)] + [ + pd.Timedelta(timedelta_interval).to_pytimedelta() + ] * ("nanoseconds" not in timedelta_interval): joined = api.asof_join(left, right, "time", tolerance=interval) asof = left.asof_join(right, "time") filt = asof.filter( - [ - asof.time <= asof.time_right + interval, - asof.time >= asof.time_right - interval, - ] + asof.time <= asof.time_right + interval, + asof.time >= asof.time_right - interval, ) join = left.left_join(filt, [left.time == filt.time]) expected = join.select( @@ -1902,6 +1895,7 @@ def test_python_table_ambiguous(): def test_memtable_filter(): + pytest.importorskip("pandas") # Mostly just a smoketest, this used to error on construction t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) expr = t.filter(t.x > 1) @@ -1919,14 +1913,6 @@ def test_default_backend_with_unbound_table(): assert expr.execute() -def test_numpy_ufuncs_dont_cast_tables(): - t = ibis.table(dict.fromkeys("abcd", "int")) - for arg in [np.int64(1), np.array([1, 2, 3])]: - for left, right in [(t, arg), (arg, t)]: - with pytest.raises(TypeError): - left + right - - def test_array_string_compare(): t = ibis.table(schema=dict(by="string", words="array"), name="t") expr = t[t.by == "foo"].mutate(words=_.words.unnest()).filter(_.words == "the") diff --git a/ibis/tests/expr/test_timestamp.py b/ibis/tests/expr/test_timestamp.py index f24bd91718f4..57018de566d7 100644 --- a/ibis/tests/expr/test_timestamp.py +++ b/ibis/tests/expr/test_timestamp.py @@ -2,8 +2,6 @@ from datetime import datetime, timedelta -import numpy as np -import pandas as pd import pytest import ibis @@ -53,15 +51,8 @@ def test_now(): assert isinstance(result.op(), ops.TimestampNow) -@pytest.mark.parametrize( - ("function", "value"), - [ - (ibis.timestamp, "2015-01-01 00:00:00"), - (ibis.literal, pd.Timestamp("2015-01-01 00:00:00")), - ], -) -def test_timestamp_literals(function, value): - expr = function(value) +def test_timestamp_literals(): + expr = ibis.timestamp("2015-01-01 00:00:00") assert isinstance(expr, ir.TimestampScalar) @@ -88,17 +79,23 @@ def test_comparisons_string(alltypes): def test_comparisons_pandas_timestamp(alltypes): + pd = pytest.importorskip("pandas") + val = pd.Timestamp("2015-01-01 00:00:00") expr = alltypes.i > val op = expr.op() + assert isinstance(op, ops.Greater) assert isinstance(op.right, ops.Literal) assert isinstance(op.right.dtype, dt.Timestamp) + expr = ibis.literal(val) < alltypes.i + op = expr.op() + assert isinstance(op, ops.Less) + assert isinstance(op.left, ops.Literal) + assert isinstance(op.left.dtype, dt.Timestamp) -def test_greater_comparison_pandas_timestamp(alltypes): - val = pd.Timestamp("2015-01-01 00:00:00") - expr2 = val < alltypes.i - op = expr2.op() + expr = val < alltypes.i + op = expr.op() assert isinstance(op, ops.Greater) assert isinstance(op.right, ops.Literal) assert isinstance(op.right.dtype, dt.Timestamp) @@ -164,26 +161,16 @@ def test_timestamp_field_access_on_time( assert isinstance(result.op(), expected_operation) -@pytest.mark.parametrize( - ("field", "expected_operation", "expected_type"), - [ - ("year", ops.ExtractYear, ir.IntegerColumn), - ("month", ops.ExtractMonth, ir.IntegerColumn), - ("day", ops.ExtractDay, ir.IntegerColumn), - ], -) -def test_timestamp_field_access_on_time_failure( - field, expected_operation, expected_type, alltypes -): +@pytest.mark.parametrize("field", ["year", "month", "day"]) +def test_timestamp_field_access_on_time_failure(field, alltypes): date_col = alltypes.i.time() with pytest.raises(AttributeError): getattr(date_col, field) -@pytest.mark.parametrize("value", [42, np.int64(42), np.int8(-42)]) -def test_integer_timestamp_fails(value): +def test_integer_timestamp_fails(): with pytest.raises(TypeError, match=r"Use ibis\.literal\(\.\.\.\)\.to_timestamp"): - ibis.timestamp(value) + ibis.timestamp(42) @pytest.mark.parametrize( diff --git a/ibis/tests/expr/test_value_exprs.py b/ibis/tests/expr/test_value_exprs.py index 0b493dd749c2..cc5a756437e5 100644 --- a/ibis/tests/expr/test_value_exprs.py +++ b/ibis/tests/expr/test_value_exprs.py @@ -9,7 +9,6 @@ from decimal import Decimal from operator import attrgetter, methodcaller -import numpy as np import pytest import pytz import toolz @@ -373,7 +372,7 @@ def test_isnan_isinf_column(table, column): assert isinstance(expr.op(), ops.IsInf) -@pytest.mark.parametrize("value", [1.3, np.nan, np.inf, -np.inf]) +@pytest.mark.parametrize("value", [1.3, float("nan"), float("inf"), -float("inf")]) def test_isnan_isinf_scalar(value): expr = ibis.literal(value).isnan() assert isinstance(expr, ir.BooleanScalar) @@ -1281,26 +1280,32 @@ def test_invalid_negate(value, expected_type): @pytest.mark.parametrize( "type", [ - np.float16, - np.float32, - np.float64, - np.int16, - np.int32, - np.int64, - np.int64, - np.int8, - np.timedelta64, - np.uint16, - np.uint32, - np.uint64, - np.uint64, - np.uint8, - float, - int, + "float16", + "float32", + "float64", + "int16", + "int32", + "int64", + "int64", + "int8", + "timedelta64", + "uint16", + "uint32", + "uint64", + "uint64", + "uint8", ], ) def test_valid_negate(type): - expr = ibis.literal(1) + np = pytest.importorskip("numpy") + typ = getattr(np, type) + expr = ibis.literal(typ(1)) + assert -expr is not None + + +@pytest.mark.parametrize("type", [float, int]) +def test_valid_negate_builtin(type): + expr = ibis.literal(type(1)) assert -expr is not None @@ -1627,6 +1632,8 @@ def test_deferred_nested_types(case): def test_numpy_ufuncs_dont_cast_columns(): + np = pytest.importorskip("numpy") + t = ibis.table(dict.fromkeys("abcd", "int")) # Adding a numpy array doesn't implicitly compute diff --git a/ibis/tests/expr/test_window_frames.py b/ibis/tests/expr/test_window_frames.py index 5560a3608501..5f6ac6dd0fb6 100644 --- a/ibis/tests/expr/test_window_frames.py +++ b/ibis/tests/expr/test_window_frames.py @@ -1,6 +1,5 @@ from __future__ import annotations -import numpy as np import pytest from pytest import param @@ -264,13 +263,17 @@ def test_window_api_properly_determines_how(): assert ibis.window(between=(None, 5)).how == "rows" assert ibis.window(between=(1, 3)).how == "rows" assert ibis.window(5).how == "rows" - assert ibis.window(np.int64(7)).how == "rows" assert ibis.window(ibis.interval(days=3)).how == "range" assert ibis.window(3.1).how == "range" assert ibis.window(following=3.14).how == "range" assert ibis.window(following=3).how == "rows" +def test_window_api_properly_determines_how_numpy(): + np = pytest.importorskip("numpy") + assert ibis.window(np.int64(7)).how == "rows" + + def test_window_api_mutually_exclusive_options(): with pytest.raises(IbisInputError): ibis.window(between=(None, 5), preceding=3) diff --git a/ibis/tests/strategies.py b/ibis/tests/strategies.py index 8d2e7cdb14d3..982289977d3b 100644 --- a/ibis/tests/strategies.py +++ b/ibis/tests/strategies.py @@ -3,9 +3,9 @@ import warnings import hypothesis as h -import hypothesis.extra.pandas as past import hypothesis.extra.pytz as tzst import hypothesis.strategies as st +import pytest import ibis import ibis.expr.datatypes as dt @@ -240,6 +240,9 @@ def schema(draw, item_strategy=_item_strategy, max_size=20): @st.composite def memtable(draw, schema=schema(primitive_dtypes)): # noqa: B008 + pytest.importorskip("pandas") + past = pytest.importorskip("hypothesis.extra.pandas") + schema = draw(schema) columns = [past.column(name, dtype=dtype) for name, dtype in schema.to_pandas()] diff --git a/ibis/tests/test_strategies.py b/ibis/tests/test_strategies.py index e771208a4cd7..fc0da95c8643 100644 --- a/ibis/tests/test_strategies.py +++ b/ibis/tests/test_strategies.py @@ -2,7 +2,6 @@ import hypothesis as h import hypothesis.strategies as st -import numpy as np import pytest import ibis @@ -157,11 +156,13 @@ def test_schema_array_dtype(schema): @h.given(its.primitive_dtypes()) def test_primitive_dtypes_to_pandas(dtype): + np = pytest.importorskip("numpy") assert isinstance(dtype.to_pandas(), np.dtype) @h.given(its.schema()) def test_schema_to_pandas(schema): + pytest.importorskip("pandas") pandas_schema = schema.to_pandas() assert len(pandas_schema) == len(schema) diff --git a/nix/ibis-core.nix b/nix/ibis-core.nix new file mode 100644 index 000000000000..2911c7b6fee4 --- /dev/null +++ b/nix/ibis-core.nix @@ -0,0 +1,14 @@ +{ poetry2nix +, python3 +, lib +, gitignoreSource +, graphviz-nox +, sqlite +, ibisTestingData +}: +let + mkApp = import ./ibis.nix { + inherit poetry2nix python3 lib gitignoreSource graphviz-nox sqlite ibisTestingData; + }; +in +mkApp { } diff --git a/nix/ibis-local.nix b/nix/ibis-local.nix new file mode 100644 index 000000000000..56009d3f1593 --- /dev/null +++ b/nix/ibis-local.nix @@ -0,0 +1,19 @@ +{ poetry2nix +, python3 +, lib +, gitignoreSource +, graphviz-nox +, sqlite +, ibisTestingData +}: +let + mkApp = import ./ibis.nix { + inherit poetry2nix python3 lib gitignoreSource graphviz-nox sqlite ibisTestingData; + }; +in +mkApp { + extras = [ "decompiler" "visualization" ]; + backends = [ "datafusion" "duckdb" "pandas" "polars" "sqlite" ] + # dask version has a show-stopping bug for Python >=3.11 + ++ lib.optionals (python3.pythonOlder "3.11") [ "dask" ]; +} diff --git a/nix/ibis.nix b/nix/ibis.nix index 61ea5c4ad0a8..5f430b3c71c6 100644 --- a/nix/ibis.nix +++ b/nix/ibis.nix @@ -6,21 +6,15 @@ , sqlite , ibisTestingData }: -let - # pyspark could be added here, but it doesn't handle parallel test execution - # well and serially it takes on the order of 7-8 minutes to execute serially - backends = [ "datafusion" "duckdb" "pandas" "polars" "sqlite" ] - # dask version has a show-stopping bug for Python >=3.11 - ++ lib.optionals (python3.pythonOlder "3.11") [ "dask" ]; - markers = lib.concatStringsSep " or " (backends ++ [ "core" ]); -in -poetry2nix.mkPoetryApplication rec { +# pyspark could be added here, but it doesn't handle parallel test execution +# well and serially it takes on the order of 7-8 minutes to execute serially +{ backends ? [ ], extras ? [ ] }: poetry2nix.mkPoetryApplication { python = python3; groups = [ ]; checkGroups = [ "test" ]; projectDir = gitignoreSource ../.; src = gitignoreSource ../.; - extras = backends ++ [ "decompiler" "visualization" ]; + extras = backends ++ extras; overrides = [ (import ../poetry-overrides.nix) poetry2nix.defaultPoetryOverrides @@ -30,7 +24,8 @@ poetry2nix.mkPoetryApplication rec { POETRY_DYNAMIC_VERSIONING_BYPASS = "1"; - nativeCheckInputs = [ graphviz-nox sqlite ]; + nativeCheckInputs = lib.optionals (lib.elem "sqlite" backends) [ sqlite ] + ++ lib.optionals (lib.elem "visualization" extras) [ graphviz-nox ]; preCheck = '' set -euo pipefail @@ -41,15 +36,19 @@ poetry2nix.mkPoetryApplication rec { ln -s "${ibisTestingData}" $PWD/ci/ibis-testing-data ''; - checkPhase = '' - set -euo pipefail + checkPhase = + let + markers = lib.concatStringsSep " or " (backends ++ [ "core" ]); + in + '' + set -euo pipefail - runHook preCheck + runHook preCheck - pytest -m '${markers}' --numprocesses "$NIX_BUILD_CORES" --dist loadgroup + pytest -m '${markers}' --numprocesses "$NIX_BUILD_CORES" --dist loadgroup - runHook postCheck - ''; + runHook postCheck + ''; doCheck = true; diff --git a/nix/overlay.nix b/nix/overlay.nix index 3c1a10e7cfcb..bd56111f150c 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -25,9 +25,13 @@ in sha256 = "sha256-1fenQNQB+Q0pbb0cbK2S/UIwZDE4PXXG15MH3aVbyLU="; }; - ibis310 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python310; }; - ibis311 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python311; }; - ibis312 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python312; }; + ibisCore310 = pkgs.callPackage ./ibis-core.nix { python3 = pkgs.python310; }; + ibisCore311 = pkgs.callPackage ./ibis-core.nix { python3 = pkgs.python311; }; + ibisCore312 = pkgs.callPackage ./ibis-core.nix { python3 = pkgs.python312; }; + + ibisLocal310 = pkgs.callPackage ./ibis-local.nix { python3 = pkgs.python310; }; + ibisLocal311 = pkgs.callPackage ./ibis-local.nix { python3 = pkgs.python311; }; + ibisLocal312 = pkgs.callPackage ./ibis-local.nix { python3 = pkgs.python312; }; ibisDevEnv310 = mkPoetryDevEnv pkgs.python310; ibisDevEnv311 = mkPoetryDevEnv pkgs.python311; diff --git a/poetry.lock b/poetry.lock index b933ea8cf712..53738ac01c21 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7760,33 +7760,33 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "pyarrow", "pyarrow-hotfix", "pydata-google-auth"] -clickhouse = ["clickhouse-connect", "pyarrow", "pyarrow-hotfix"] -dask = ["dask", "packaging", "pyarrow", "pyarrow-hotfix", "regex"] -datafusion = ["datafusion", "pyarrow", "pyarrow-hotfix"] +bigquery = ["db-dtypes", "google-cloud-bigquery", "google-cloud-bigquery-storage", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pydata-google-auth", "rich"] +clickhouse = ["clickhouse-connect", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] +dask = ["dask", "numpy", "packaging", "pandas", "pyarrow", "pyarrow-hotfix", "regex", "rich"] +datafusion = ["datafusion", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] decompiler = ["black"] deltalake = ["deltalake"] -druid = ["pyarrow", "pyarrow-hotfix", "pydruid"] -duckdb = ["duckdb", "pyarrow", "pyarrow-hotfix"] +druid = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pydruid", "rich"] +duckdb = ["duckdb", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] examples = ["fsspec", "pins"] -exasol = ["pyarrow", "pyarrow-hotfix", "pyexasol"] -flink = ["pyarrow", "pyarrow-hotfix"] +exasol = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pyexasol", "rich"] +flink = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] geospatial = ["geoarrow-types", "geopandas", "pyproj", "shapely"] -impala = ["impyla", "pyarrow", "pyarrow-hotfix"] -mssql = ["pyarrow", "pyarrow-hotfix", "pyodbc"] -mysql = ["pyarrow", "pyarrow-hotfix", "pymysql"] -oracle = ["oracledb", "pyarrow", "pyarrow-hotfix"] -pandas = ["packaging", "pyarrow", "pyarrow-hotfix", "regex"] -polars = ["polars", "pyarrow", "pyarrow-hotfix"] -postgres = ["psycopg2", "pyarrow", "pyarrow-hotfix"] -pyspark = ["packaging", "pyarrow", "pyarrow-hotfix", "pyspark"] -risingwave = ["psycopg2", "pyarrow", "pyarrow-hotfix"] -snowflake = ["pyarrow", "pyarrow-hotfix", "snowflake-connector-python"] -sqlite = ["pyarrow", "pyarrow-hotfix", "regex"] -trino = ["pyarrow", "pyarrow-hotfix", "trino"] +impala = ["impyla", "numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] +mssql = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pyodbc", "rich"] +mysql = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "pymysql", "rich"] +oracle = ["numpy", "oracledb", "packaging", "pandas", "pyarrow", "pyarrow-hotfix", "rich"] +pandas = ["numpy", "packaging", "pandas", "pyarrow", "pyarrow-hotfix", "regex", "rich"] +polars = ["numpy", "packaging", "pandas", "polars", "pyarrow", "pyarrow-hotfix", "rich"] +postgres = ["numpy", "pandas", "psycopg2", "pyarrow", "pyarrow-hotfix", "rich"] +pyspark = ["numpy", "packaging", "pandas", "pyarrow", "pyarrow-hotfix", "pyspark", "rich"] +risingwave = ["numpy", "pandas", "psycopg2", "pyarrow", "pyarrow-hotfix", "rich"] +snowflake = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich", "snowflake-connector-python"] +sqlite = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "regex", "rich"] +trino = ["numpy", "pandas", "pyarrow", "pyarrow-hotfix", "rich", "trino"] visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e7d5e50095adedffcb009420bd275ad64d37ded89f5270fa0cf612f01f1df266" +content-hash = "96eeb24a6d09280de35c84f663b9fc3c1d77c06671fcb9604cf1bf81c704c0a6" diff --git a/pyproject.toml b/pyproject.toml index 6b0d70b2273f..181f9ccb9c0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,17 +38,17 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.10" atpublic = ">=2.3,<6" -numpy = ">=1.23.2,<3" -pandas = ">=1.5.3,<3" parsy = ">=2,<3" -pyarrow = { version = ">=10.0.1,<18", optional = true } -pyarrow-hotfix = { version = ">=0.4,<1", optional = true } python-dateutil = ">=2.8.2,<3" pytz = ">=2022.7" -rich = ">=12.4.4,<14" sqlglot = ">=23.4,<25.10" toolz = ">=0.11,<1" typing-extensions = ">=4.3.0,<5" +numpy = { version = ">=1.23.2,<3", optional = true } +pandas = { version = ">=1.5.3,<3", optional = true } +pyarrow = { version = ">=10.0.1,<18", optional = true } +pyarrow-hotfix = { version = ">=0.4,<1", optional = true } +rich = { version = ">=12.4.4,<14", optional = true } black = { version = ">=22.1.0,<25", optional = true } clickhouse-connect = { version = ">=0.5.23,<1", optional = true, extras = [ "arrow", @@ -150,26 +150,98 @@ bigquery = [ "pyarrow", "pyarrow-hotfix", "pydata-google-auth", + "numpy", + "pandas", + "rich", +] +clickhouse = [ + "clickhouse-connect", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +dask = [ + "dask", + "regex", + "packaging", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +datafusion = [ + "datafusion", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +druid = ["pydruid", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +duckdb = ["duckdb", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +exasol = ["pyexasol", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +flink = ["pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +impala = ["impyla", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +mssql = ["pyodbc", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +mysql = ["pymysql", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +oracle = [ + "oracledb", + "packaging", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +pandas = [ + "regex", + "packaging", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +polars = [ + "polars", + "packaging", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +postgres = ["psycopg2", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +pyspark = [ + "pyspark", + "packaging", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +snowflake = [ + "snowflake-connector-python", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", +] +sqlite = ["regex", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] +risingwave = [ + "psycopg2", + "pyarrow", + "pyarrow-hotfix", + "numpy", + "pandas", + "rich", ] -clickhouse = ["clickhouse-connect", "pyarrow", "pyarrow-hotfix"] -dask = ["dask", "regex", "packaging", "pyarrow", "pyarrow-hotfix"] -datafusion = ["datafusion", "pyarrow", "pyarrow-hotfix"] -druid = ["pydruid", "pyarrow", "pyarrow-hotfix"] -duckdb = ["duckdb", "pyarrow", "pyarrow-hotfix"] -exasol = ["pyexasol", "pyarrow", "pyarrow-hotfix"] -flink = ["pyarrow", "pyarrow-hotfix"] -impala = ["impyla", "pyarrow", "pyarrow-hotfix"] -mssql = ["pyodbc", "pyarrow", "pyarrow-hotfix"] -mysql = ["pymysql", "pyarrow", "pyarrow-hotfix"] -oracle = ["oracledb", "pyarrow", "pyarrow-hotfix"] -pandas = ["regex", "packaging", "pyarrow", "pyarrow-hotfix"] -polars = ["polars", "pyarrow", "pyarrow-hotfix"] -postgres = ["psycopg2", "pyarrow", "pyarrow-hotfix"] -pyspark = ["pyspark", "packaging", "pyarrow", "pyarrow-hotfix"] -snowflake = ["snowflake-connector-python", "pyarrow", "pyarrow-hotfix"] -sqlite = ["regex", "pyarrow", "pyarrow-hotfix"] -risingwave = ["psycopg2", "pyarrow", "pyarrow-hotfix"] -trino = ["trino", "pyarrow", "pyarrow-hotfix"] +trino = ["trino", "pyarrow", "pyarrow-hotfix", "numpy", "pandas", "rich"] # non-backend extras visualization = ["graphviz"] decompiler = ["black"]